Compare commits

..

1 Commits

Author SHA1 Message Date
Tamo
2141cb3b69 add tests on the rest embedder 2024-07-01 12:05:02 +02:00
127 changed files with 1080 additions and 2619 deletions

View File

@@ -18,9 +18,11 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
run: |

View File

@@ -35,9 +35,11 @@ jobs:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run benchmarks on PR ${{ github.event.issue.id }}
run: |

View File

@@ -12,9 +12,11 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}

View File

@@ -18,9 +18,11 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name

View File

@@ -13,9 +13,11 @@ jobs:
runs-on: benchmarks
timeout-minutes: 4320 # 72h
steps:
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Check for Command
id: command

View File

@@ -16,9 +16,11 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name

View File

@@ -15,9 +15,11 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name

View File

@@ -15,9 +15,11 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name

View File

@@ -15,9 +15,11 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name

View File

@@ -1,6 +1,4 @@
name: Look for flaky tests
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
on:
workflow_dispatch:
schedule:
@@ -18,7 +16,10 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps

View File

@@ -1,6 +1,5 @@
name: Run the indexing fuzzer
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
on:
push:
branches:
@@ -13,9 +12,11 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Run benchmarks
- name: Run the fuzzer

View File

@@ -15,8 +15,6 @@ jobs:
debian:
name: Publish debian packagge
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest
needs: check-version
container:
@@ -27,7 +25,10 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v3

View File

@@ -35,8 +35,6 @@ jobs:
publish-linux:
name: Publish binary for Linux
runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
needs: check-version
container:
# Use ubuntu-18.04 to compile with glibc 2.27
@@ -47,7 +45,10 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
@@ -77,7 +78,10 @@ jobs:
asset_name: meilisearch-windows-amd64.exe
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
@@ -103,10 +107,12 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v3
- name: Installing Rust toolchain
uses: helix-editor/rust-toolchain@v1
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
target: ${{ matrix.target }}
override: true
- name: Cargo build
uses: actions-rs/cargo@v1
with:
@@ -126,8 +132,6 @@ jobs:
name: Publish binary for aarch64
runs-on: ubuntu-latest
needs: check-version
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container:
# Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:18.04
@@ -150,10 +154,12 @@ jobs:
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain
uses: helix-editor/rust-toolchain@v1
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
target: ${{ matrix.target }}
override: true
- name: Configure target aarch64 GNU
## Environment variable is not passed using env:
## LD gold won't work with MUSL

View File

@@ -80,11 +80,10 @@ jobs:
type=ref,event=tag
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Build and push
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
push: true
platforms: linux/amd64,linux/arm64

View File

@@ -21,8 +21,6 @@ jobs:
test-linux:
name: Tests on ubuntu-18.04
runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
@@ -33,7 +31,10 @@ jobs:
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Setup test with Rust stable
uses: helix-editor/rust-toolchain@v1
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
- name: Run cargo check without any default features
@@ -58,7 +59,10 @@ jobs:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -73,8 +77,6 @@ jobs:
test-all-features:
name: Tests almost all features
runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
@@ -85,7 +87,10 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
@@ -95,8 +100,6 @@ jobs:
test-disabled-tokenization:
name: Test disabled tokenization
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest
container:
image: ubuntu:18.04
@@ -107,7 +110,10 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo tree without default features and check lindera is not present
run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
@@ -121,8 +127,6 @@ jobs:
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
@@ -133,7 +137,10 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
- name: Run tests in debug
@@ -147,9 +154,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: 1.75.0
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
@@ -164,10 +173,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: nightly-2024-06-25
toolchain: nightly
override: true
components: rustfmt
- name: Cache dependencies

View File

@@ -18,9 +18,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Install sd
run: cargo install sd
- name: Update Cargo.toml file

View File

@@ -109,12 +109,6 @@ They are JSON files with the following structure (comments are not actually supp
"run_count": 3,
// List of arguments to add to the Meilisearch command line.
"extra_cli_args": ["--max-indexing-threads=1"],
// An expression that can be parsed as a comma-separated list of targets and levels
// as described in [tracing_subscriber's documentation](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/targets/struct.Targets.html#examples).
// The expression is used to filter the spans that are measured for profiling purposes.
// Optional, defaults to "indexing::=trace" (for indexing workloads), common other values is
// "search::=trace"
"target": "indexing::=trace",
// List of named assets that can be used in the commands.
"assets": {
// name of the asset.

148
Cargo.lock generated
View File

@@ -55,7 +55,7 @@ dependencies = [
"encoding_rs",
"flate2",
"futures-core",
"h2",
"h2 0.3.26",
"http 0.2.11",
"httparse",
"httpdate",
@@ -456,6 +456,12 @@ dependencies = [
"critical-section",
]
[[package]]
name = "atomic-waker"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]]
name = "autocfg"
version = "1.2.0"
@@ -1338,6 +1344,24 @@ dependencies = [
"syn 2.0.60",
]
[[package]]
name = "deadpool"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb84100978c1c7b37f09ed3ce3e5f843af02c2a2c431bae5b19230dad2c1b490"
dependencies = [
"async-trait",
"deadpool-runtime",
"num_cpus",
"tokio",
]
[[package]]
name = "deadpool-runtime"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b"
[[package]]
name = "debugid"
version = "0.8.0"
@@ -2213,6 +2237,25 @@ dependencies = [
"tracing",
]
[[package]]
name = "h2"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab"
dependencies = [
"atomic-waker",
"bytes",
"fnv",
"futures-core",
"futures-sink",
"http 1.0.0",
"indexmap",
"slab",
"tokio",
"tokio-util",
"tracing",
]
[[package]]
name = "half"
version = "1.8.2"
@@ -2378,6 +2421,29 @@ dependencies = [
"pin-project-lite",
]
[[package]]
name = "http-body"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643"
dependencies = [
"bytes",
"http 1.0.0",
]
[[package]]
name = "http-body-util"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
dependencies = [
"bytes",
"futures-util",
"http 1.0.0",
"http-body 1.0.0",
"pin-project-lite",
]
[[package]]
name = "httparse"
version = "1.8.0"
@@ -2400,9 +2466,9 @@ dependencies = [
"futures-channel",
"futures-core",
"futures-util",
"h2",
"h2 0.3.26",
"http 0.2.11",
"http-body",
"http-body 0.4.5",
"httparse",
"httpdate",
"itoa",
@@ -2414,6 +2480,27 @@ dependencies = [
"want",
]
[[package]]
name = "hyper"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d"
dependencies = [
"bytes",
"futures-channel",
"futures-util",
"h2 0.4.5",
"http 1.0.0",
"http-body 1.0.0",
"httparse",
"httpdate",
"itoa",
"pin-project-lite",
"smallvec",
"tokio",
"want",
]
[[package]]
name = "hyper-rustls"
version = "0.24.1"
@@ -2422,12 +2509,27 @@ checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97"
dependencies = [
"futures-util",
"http 0.2.11",
"hyper",
"hyper 0.14.27",
"rustls 0.21.12",
"tokio",
"tokio-rustls",
]
[[package]]
name = "hyper-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56"
dependencies = [
"bytes",
"futures-util",
"http 1.0.0",
"http-body 1.0.0",
"hyper 1.3.1",
"pin-project-lite",
"tokio",
]
[[package]]
name = "ident_case"
version = "1.0.1"
@@ -3352,6 +3454,7 @@ dependencies = [
"urlencoding",
"uuid",
"walkdir",
"wiremock",
"yaup",
"zip",
]
@@ -4348,10 +4451,10 @@ dependencies = [
"encoding_rs",
"futures-core",
"futures-util",
"h2",
"h2 0.3.26",
"http 0.2.11",
"http-body",
"hyper",
"http-body 0.4.5",
"hyper 0.14.27",
"hyper-rustls",
"ipnet",
"js-sys",
@@ -6031,6 +6134,30 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "wiremock"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec874e1eef0df2dcac546057fe5e29186f09c378181cd7b635b4b7bcc98e9d81"
dependencies = [
"assert-json-diff",
"async-trait",
"base64 0.21.7",
"deadpool",
"futures",
"http 1.0.0",
"http-body-util",
"hyper 1.3.1",
"hyper-util",
"log",
"once_cell",
"regex",
"serde",
"serde_json",
"tokio",
"url",
]
[[package]]
name = "xattr"
version = "1.0.1"
@@ -6080,13 +6207,12 @@ dependencies = [
[[package]]
name = "yaup"
version = "0.3.1"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0144f1a16a199846cb21024da74edd930b43443463292f536b7110b4855b5c6"
checksum = "a59e7d27bed43f7c37c25df5192ea9d435a8092a902e02203359ac9ce3e429d9"
dependencies = [
"form_urlencoded",
"serde",
"thiserror",
"url",
]
[[package]]

View File

@@ -1,6 +1,9 @@
<p align="center">
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo" target="_blank">
<img src="assets/meilisearch-logo-kawaii.png">
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
</a>
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
</a>
</p>
@@ -22,7 +25,7 @@
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
[Meilisearch](https://www.meilisearch.com?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=intro) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
<p align="center" name="demo">
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
@@ -33,18 +36,11 @@
</a>
</p>
## 🖥 Examples
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
See the list of all our example apps in our [demos repository](https://github.com/meilisearch/demos).
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
## ✨ Features
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results
- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
@@ -63,7 +59,7 @@ You can consult Meilisearch's documentation at [meilisearch.com/docs](https://ww
## 🚀 Getting started
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [documentation](https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
## 🌍 Supercharge your Meilisearch experience
@@ -87,7 +83,7 @@ Finally, for more in-depth information, refer to our articles explaining fundame
## 📊 Telemetry
Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
@@ -109,11 +105,11 @@ Thank you for your support!
## 👩‍💻 Contributing
Meilisearch is, and will always be, open-source! If you want to contribute to the project, please look at [our contribution guidelines](CONTRIBUTING.md).
Meilisearch is, and will always be, open-source! If you want to contribute to the project, please take a look at [our contribution guidelines](CONTRIBUTING.md).
## 📦 Versioning
Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases).
Meilisearch releases and their associated binaries are available [in this GitHub page](https://github.com/meilisearch/meilisearch/releases).
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

View File

@@ -1811,7 +1811,7 @@ mod tests {
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
enable_mdb_writemap: false,
index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB
index_growth_amount: 1000 * 1000, // 1 MB
index_count: 5,
indexer_config,
autobatching_enabled: true,

View File

@@ -188,12 +188,6 @@ impl AuthFilter {
self.allow_index_creation && self.is_index_authorized(index)
}
#[inline]
/// Return true if a tenant token was used to generate the search rules.
pub fn is_tenant_token(&self) -> bool {
self.search_rules.is_some()
}
pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self {
Self {
search_rules: None,
@@ -211,7 +205,6 @@ impl AuthFilter {
.unwrap_or(true)
}
/// Check if the index is authorized by the API key and the tenant token.
pub fn is_index_authorized(&self, index: &str) -> bool {
self.key_authorized_indexes.is_index_authorized(index)
&& self
@@ -221,44 +214,6 @@ impl AuthFilter {
.unwrap_or(true)
}
/// Only check if the index is authorized by the API key
pub fn api_key_is_index_authorized(&self, index: &str) -> bool {
self.key_authorized_indexes.is_index_authorized(index)
}
/// Only check if the index is authorized by the tenant token
pub fn tenant_token_is_index_authorized(&self, index: &str) -> bool {
self.search_rules
.as_ref()
.map(|search_rules| search_rules.is_index_authorized(index))
.unwrap_or(true)
}
/// Return the list of authorized indexes by the tenant token if any
pub fn tenant_token_list_index_authorized(&self) -> Vec<String> {
match self.search_rules {
Some(ref search_rules) => {
let mut indexes: Vec<_> = match search_rules {
SearchRules::Set(set) => set.iter().map(|s| s.to_string()).collect(),
SearchRules::Map(map) => map.keys().map(|s| s.to_string()).collect(),
};
indexes.sort_unstable();
indexes
}
None => Vec::new(),
}
}
/// Return the list of authorized indexes by the api key if any
pub fn api_key_list_index_authorized(&self) -> Vec<String> {
let mut indexes: Vec<_> = match self.key_authorized_indexes {
SearchRules::Set(ref set) => set.iter().map(|s| s.to_string()).collect(),
SearchRules::Map(ref map) => map.keys().map(|s| s.to_string()).collect(),
};
indexes.sort_unstable();
indexes
}
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
if !self.is_index_authorized(index) {
return None;

View File

@@ -54,8 +54,6 @@ chinese-pinyin = ["milli/chinese-pinyin"]
hebrew = ["milli/hebrew"]
# japanese specialized tokenization
japanese = ["milli/japanese"]
# korean specialized tokenization
korean = ["milli/korean"]
# thai specialized tokenization
thai = ["milli/thai"]
# allow greek specialized tokenization

View File

@@ -98,6 +98,7 @@ tokio-stream = "0.1.14"
toml = "0.8.8"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
walkdir = "2.4.0"
yaup = "0.2.1"
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.0", features = ["serde"] }
@@ -117,7 +118,8 @@ maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6"
urlencoding = "2.1.3"
yaup = "0.3.1"
wiremock = "0.6.0"
yaup = "0.2.1"
[build-dependencies]
anyhow = { version = "1.0.79", optional = true }
@@ -150,7 +152,6 @@ chinese = ["meilisearch-types/chinese"]
chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"]
korean = ["meilisearch-types/korean"]
thai = ["meilisearch-types/thai"]
greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"]

View File

@@ -98,29 +98,14 @@ impl From<MeilisearchHttpError> for aweb::Error {
impl From<aweb::error::PayloadError> for MeilisearchHttpError {
fn from(error: aweb::error::PayloadError) -> Self {
match error {
aweb::error::PayloadError::Incomplete(_) => MeilisearchHttpError::Payload(
PayloadError::Payload(ActixPayloadError::IncompleteError),
),
_ => MeilisearchHttpError::Payload(PayloadError::Payload(
ActixPayloadError::OtherError(error),
)),
}
MeilisearchHttpError::Payload(PayloadError::Payload(error))
}
}
#[derive(Debug, thiserror::Error)]
pub enum ActixPayloadError {
#[error("The provided payload is incomplete and cannot be parsed")]
IncompleteError,
#[error(transparent)]
OtherError(aweb::error::PayloadError),
}
#[derive(Debug, thiserror::Error)]
pub enum PayloadError {
#[error(transparent)]
Payload(ActixPayloadError),
Payload(aweb::error::PayloadError),
#[error(transparent)]
Json(JsonPayloadError),
#[error(transparent)]
@@ -137,15 +122,13 @@ impl ErrorCode for PayloadError {
fn error_code(&self) -> Code {
match self {
PayloadError::Payload(e) => match e {
ActixPayloadError::IncompleteError => Code::BadRequest,
ActixPayloadError::OtherError(error) => match error {
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
aweb::error::PayloadError::UnknownLength => Code::Internal,
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
aweb::error::PayloadError::Io(_) => Code::Internal,
_ => todo!(),
},
aweb::error::PayloadError::Incomplete(_) => Code::Internal,
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
aweb::error::PayloadError::UnknownLength => Code::Internal,
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
aweb::error::PayloadError::Io(_) => Code::Internal,
_ => todo!(),
},
PayloadError::Json(err) => match err {
JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge,

View File

@@ -12,8 +12,6 @@ use futures::Future;
use meilisearch_auth::{AuthController, AuthFilter};
use meilisearch_types::error::{Code, ResponseError};
use self::policies::AuthError;
pub struct GuardedData<P, D> {
data: D,
filters: AuthFilter,
@@ -37,12 +35,12 @@ impl<P, D> GuardedData<P, D> {
let missing_master_key = auth.get_master_key().is_none();
match Self::authenticate(auth, token, index).await? {
Ok(filters) => match data {
Some(filters) => match data {
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
None => Err(AuthenticationError::IrretrievableState.into()),
},
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
Err(e) => Err(ResponseError::from_msg(e.to_string(), Code::InvalidApiKey)),
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
None => Err(AuthenticationError::InvalidToken.into()),
}
}
@@ -53,12 +51,12 @@ impl<P, D> GuardedData<P, D> {
let missing_master_key = auth.get_master_key().is_none();
match Self::authenticate(auth, String::new(), None).await? {
Ok(filters) => match data {
Some(filters) => match data {
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
None => Err(AuthenticationError::IrretrievableState.into()),
},
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
Err(_) => Err(AuthenticationError::MissingAuthorizationHeader.into()),
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
None => Err(AuthenticationError::MissingAuthorizationHeader.into()),
}
}
@@ -66,7 +64,7 @@ impl<P, D> GuardedData<P, D> {
auth: Data<AuthController>,
token: String,
index: Option<String>,
) -> Result<Result<AuthFilter, AuthError>, ResponseError>
) -> Result<Option<AuthFilter>, ResponseError>
where
P: Policy + 'static,
{
@@ -129,14 +127,13 @@ pub trait Policy {
auth: Data<AuthController>,
token: &str,
index: Option<&str>,
) -> Result<AuthFilter, policies::AuthError>;
) -> Option<AuthFilter>;
}
pub mod policies {
use actix_web::web::Data;
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
use meilisearch_types::error::{Code, ErrorCode};
// reexport actions in policies in order to be used in routes configuration.
pub use meilisearch_types::keys::{actions, Action};
use serde::{Deserialize, Serialize};
@@ -147,53 +144,11 @@ pub mod policies {
enum TenantTokenOutcome {
NotATenantToken,
Invalid,
Expired,
Valid(Uuid, SearchRules),
}
#[derive(thiserror::Error, Debug)]
pub enum AuthError {
#[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`.")]
ExpiredTenantToken { exp: i64, now: i64 },
#[error("The provided API key is invalid.")]
InvalidApiKey,
#[error("The provided tenant token cannot acces the index `{index}`, allowed indexes are {allowed:?}.")]
TenantTokenAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
#[error(
"The API key used to generate this tenant token cannot acces the index `{index}`."
)]
TenantTokenApiKeyAccessingnUnauthorizedIndex { index: String },
#[error(
"The API key cannot acces the index `{index}`, authorized indexes are {allowed:?}."
)]
ApiKeyAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
#[error("The provided tenant token is invalid.")]
InvalidTenantToken,
#[error("Could not decode tenant token, {0}.")]
CouldNotDecodeTenantToken(jsonwebtoken::errors::Error),
#[error("Invalid action `{0}`.")]
InternalInvalidAction(u8),
}
impl From<jsonwebtoken::errors::Error> for AuthError {
fn from(error: jsonwebtoken::errors::Error) -> Self {
use jsonwebtoken::errors::ErrorKind;
match error.kind() {
ErrorKind::InvalidToken => AuthError::InvalidTenantToken,
_ => AuthError::CouldNotDecodeTenantToken(error),
}
}
}
impl ErrorCode for AuthError {
fn error_code(&self) -> Code {
match self {
AuthError::InternalInvalidAction(_) => Code::Internal,
_ => Code::InvalidApiKey,
}
}
}
fn tenant_token_validation() -> Validation {
let mut validation = Validation::default();
validation.validate_exp = false;
@@ -203,15 +158,15 @@ pub mod policies {
}
/// Extracts the key id used to sign the payload, without performing any validation.
fn extract_key_id(token: &str) -> Result<Uuid, AuthError> {
fn extract_key_id(token: &str) -> Option<Uuid> {
let mut validation = tenant_token_validation();
validation.insecure_disable_signature_validation();
let dummy_key = DecodingKey::from_secret(b"secret");
let token_data = decode::<Claims>(token, &dummy_key, &validation)?;
let token_data = decode::<Claims>(token, &dummy_key, &validation).ok()?;
// get token fields without validating it.
let Claims { api_key_uid, .. } = token_data.claims;
Ok(api_key_uid)
Some(api_key_uid)
}
fn is_keys_action(action: u8) -> bool {
@@ -232,102 +187,76 @@ pub mod policies {
auth: Data<AuthController>,
token: &str,
index: Option<&str>,
) -> Result<AuthFilter, AuthError> {
) -> Option<AuthFilter> {
// authenticate if token is the master key.
// Without a master key, all routes are accessible except the key-related routes.
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
return Ok(AuthFilter::default());
return Some(AuthFilter::default());
}
let (key_uuid, search_rules) =
match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
Ok(TenantTokenOutcome::Valid(key_uuid, search_rules)) => {
TenantTokenOutcome::Valid(key_uuid, search_rules) => {
(key_uuid, Some(search_rules))
}
Ok(TenantTokenOutcome::NotATenantToken)
| Err(AuthError::InvalidTenantToken) => (
auth.get_optional_uid_from_encoded_key(token.as_bytes())
.map_err(|_e| AuthError::InvalidApiKey)?
.ok_or(AuthError::InvalidApiKey)?,
None,
),
Err(e) => return Err(e),
TenantTokenOutcome::Expired => return None,
TenantTokenOutcome::Invalid => return None,
TenantTokenOutcome::NotATenantToken => {
(auth.get_optional_uid_from_encoded_key(token.as_bytes()).ok()??, None)
}
};
// check that the indexes are allowed
let action = Action::from_repr(A).ok_or(AuthError::InternalInvalidAction(A))?;
let auth_filter = auth
.get_key_filters(key_uuid, search_rules)
.map_err(|_e| AuthError::InvalidApiKey)?;
// First check if the index is authorized in the tenant token, this is a public
// information, we can return a nice error message.
if let Some(index) = index {
if !auth_filter.tenant_token_is_index_authorized(index) {
return Err(AuthError::TenantTokenAccessingnUnauthorizedIndex {
index: index.to_string(),
allowed: auth_filter.tenant_token_list_index_authorized(),
});
}
if !auth_filter.api_key_is_index_authorized(index) {
if auth_filter.is_tenant_token() {
// If the error comes from a tenant token we cannot share the list
// of authorized indexes in the API key. This is not public information.
return Err(AuthError::TenantTokenApiKeyAccessingnUnauthorizedIndex {
index: index.to_string(),
});
} else {
// Otherwise we can share the list
// of authorized indexes in the API key.
return Err(AuthError::ApiKeyAccessingnUnauthorizedIndex {
index: index.to_string(),
allowed: auth_filter.api_key_list_index_authorized(),
});
}
}
}
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) {
return Ok(auth_filter);
let action = Action::from_repr(A)?;
let auth_filter = auth.get_key_filters(key_uuid, search_rules).ok()?;
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false)
&& index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true)
{
return Some(auth_filter);
}
Err(AuthError::InvalidApiKey)
None
}
}
impl<const A: u8> ActionPolicy<A> {
fn authenticate_tenant_token(
auth: &AuthController,
token: &str,
) -> Result<TenantTokenOutcome, AuthError> {
fn authenticate_tenant_token(auth: &AuthController, token: &str) -> TenantTokenOutcome {
// Only search action can be accessed by a tenant token.
if A != actions::SEARCH {
return Ok(TenantTokenOutcome::NotATenantToken);
return TenantTokenOutcome::NotATenantToken;
}
let uid = extract_key_id(token)?;
let uid = if let Some(uid) = extract_key_id(token) {
uid
} else {
return TenantTokenOutcome::NotATenantToken;
};
// Check if tenant token is valid.
let key = if let Some(key) = auth.generate_key(uid) {
key
} else {
return Err(AuthError::InvalidTenantToken);
return TenantTokenOutcome::Invalid;
};
let data = decode::<Claims>(
let data = if let Ok(data) = decode::<Claims>(
token,
&DecodingKey::from_secret(key.as_bytes()),
&tenant_token_validation(),
)?;
) {
data
} else {
return TenantTokenOutcome::Invalid;
};
// Check if token is expired.
if let Some(exp) = data.claims.exp {
let now = OffsetDateTime::now_utc().unix_timestamp();
if now > exp {
return Err(AuthError::ExpiredTenantToken { exp, now });
if OffsetDateTime::now_utc().unix_timestamp() > exp {
return TenantTokenOutcome::Expired;
}
}
Ok(TenantTokenOutcome::Valid(uid, data.claims.search_rules))
TenantTokenOutcome::Valid(uid, data.claims.search_rules)
}
}

View File

@@ -752,15 +752,10 @@ fn prepare_search<'t>(
SearchKind::SemanticOnly { embedder_name, embedder } => {
let vector = match query.vector.clone() {
Some(vector) => vector,
None => {
let span = tracing::trace_span!(target: "search::vector", "embed_one");
let _entered = span.enter();
embedder
.embed_one(query.q.clone().unwrap())
.map_err(milli::vector::Error::from)
.map_err(milli::Error::from)?
}
None => embedder
.embed_one(query.q.clone().unwrap())
.map_err(milli::vector::Error::from)
.map_err(milli::Error::from)?,
};
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
@@ -1336,23 +1331,13 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
// TODO: TAMO: milli encountered an internal error, what do we want to do?
let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()];
let geo_point = &document.get("_geo").unwrap_or(&json!(null));
if let Some((lat, lng)) =
extract_geo_value(&geo_point["lat"]).zip(extract_geo_value(&geo_point["lng"]))
{
if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
let distance = milli::distance_between_two_points(&base, &[lat, lng]);
document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
}
}
}
fn extract_geo_value(value: &Value) -> Option<f64> {
match value {
Value::Number(n) => n.as_f64(),
Value::String(s) => s.parse().ok(),
_ => None,
}
}
fn compute_formatted_options(
attr_to_highlight: &HashSet<String>,
attr_to_crop: &[String],
@@ -1726,54 +1711,4 @@ mod test {
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), None);
}
#[test]
fn test_insert_geo_distance_with_coords_as_string() {
let value: Document = serde_json::from_str(
r#"{
"_geo": {
"lat": "50",
"lng": 3
}
}"#,
)
.unwrap();
let sorters = &["_geoPoint(50,3):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let value: Document = serde_json::from_str(
r#"{
"_geo": {
"lat": "50",
"lng": "3"
},
"id": "1"
}"#,
)
.unwrap();
let sorters = &["_geoPoint(50,3):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let value: Document = serde_json::from_str(
r#"{
"_geo": {
"lat": 50,
"lng": "3"
},
"id": "1"
}"#,
)
.unwrap();
let sorters = &["_geoPoint(50,3):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
}
}

View File

@@ -78,7 +78,7 @@ pub static ALL_ACTIONS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
});
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
json!({"message": null,
json!({"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
@@ -119,8 +119,7 @@ async fn error_access_expired_key() {
thread::sleep(time::Duration::new(1, 0));
for (method, route) in AUTHORIZATIONS.keys() {
let (mut response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request(method, route).await;
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
assert_eq!(403, code, "{:?}", &response);
@@ -150,8 +149,7 @@ async fn error_access_unauthorized_index() {
// filter `products` index routes
.filter(|(_, route)| route.starts_with("/indexes/products"))
{
let (mut response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request(method, route).await;
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
assert_eq!(403, code, "{:?}", &response);
@@ -178,8 +176,7 @@ async fn error_access_unauthorized_action() {
let key = response["key"].as_str().unwrap();
server.use_api_key(key);
let (mut response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request(method, route).await;
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
assert_eq!(403, code, "{:?}", &response);
@@ -283,7 +280,7 @@ async fn access_authorized_no_index_restriction() {
route,
action
);
assert_ne!(code, 403, "on route: {:?} - {:?} with action: {:?}", method, route, action);
assert_ne!(code, 403);
}
}
}

View File

@@ -1,10 +1,7 @@
use actix_web::test;
use http::StatusCode;
use jsonwebtoken::{EncodingKey, Header};
use meili_snap::*;
use uuid::Uuid;
use crate::common::{Server, Value};
use crate::common::Server;
use crate::json;
#[actix_rt::test]
@@ -439,262 +436,3 @@ async fn patch_api_keys_unknown_field() {
}
"###);
}
async fn send_request_with_custom_auth(
app: impl actix_web::dev::Service<
actix_http::Request,
Response = actix_web::dev::ServiceResponse<impl actix_web::body::MessageBody>,
Error = actix_web::Error,
>,
url: &str,
auth: &str,
) -> (Value, StatusCode) {
let req = test::TestRequest::get().uri(url).insert_header(("Authorization", auth)).to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
(response, status_code)
}
#[actix_rt::test]
async fn invalid_auth_format() {
let server = Server::new_auth().await;
let app = server.init_web_app().await;
let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"401 Unauthorized");
snapshot!(response, @r###"
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
"###);
let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"401 Unauthorized");
snapshot!(response, @r###"
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
"###);
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/documents", "Bearer").await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
}
#[actix_rt::test]
async fn invalid_api_key() {
let server = Server::new_auth().await;
let app = server.init_web_app().await;
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", "Bearer kefir").await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
let uuid = Uuid::nil();
let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
let req = test::TestRequest::post()
.uri("/keys")
.insert_header(("Authorization", "Bearer MASTER_KEY"))
.set_json(&key)
.to_request();
let res = test::call_service(&app, req).await;
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###"
{
"name": null,
"description": null,
"key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
"uid": "00000000-0000-0000-0000-000000000000",
"actions": [
"search"
],
"indexes": [
"dog"
],
"expiresAt": null,
"createdAt": "[date]",
"updatedAt": "[date]"
}
"###);
let key = response["key"].as_str().unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {key}"))
.await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The API key cannot acces the index `doggo`, authorized indexes are [\"dog\"].",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
}
#[actix_rt::test]
async fn invalid_tenant_token() {
let server = Server::new_auth().await;
let app = server.init_web_app().await;
// The tenant token won't be recognized at all if we're not on a search route
let claims = json!({ "tamo": "kefir" });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/documents", &format!("Bearer {jwt}"))
.await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
let claims = json!({ "tamo": "kefir" });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "Could not decode tenant token, JSON error: missing field `searchRules` at line 1 column 16.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
// The error messages are not ideal but that's expected since we cannot _yet_ use deserr
let claims = json!({ "searchRules": "kefir" });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "Could not decode tenant token, JSON error: data did not match any variant of untagged enum SearchRules at line 1 column 23.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
let uuid = Uuid::nil();
let claims = json!({ "searchRules": ["kefir"], "apiKeyUid": uuid.to_string() });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "Could not decode tenant token, InvalidSignature.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
// ~~ For the next tests we first need a valid API key
let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
let req = test::TestRequest::post()
.uri("/keys")
.insert_header(("Authorization", "Bearer MASTER_KEY"))
.set_json(&key)
.to_request();
let res = test::call_service(&app, req).await;
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###"
{
"name": null,
"description": null,
"key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
"uid": "00000000-0000-0000-0000-000000000000",
"actions": [
"search"
],
"indexes": [
"dog"
],
"expiresAt": null,
"createdAt": "[date]",
"updatedAt": "[date]"
}
"###);
let key = response["key"].as_str().unwrap();
let claims = json!({ "searchRules": ["doggo", "catto"], "apiKeyUid": uuid.to_string() });
let jwt = jsonwebtoken::encode(
&Header::default(),
&claims,
&EncodingKey::from_secret(key.as_bytes()),
)
.unwrap();
// Try to access an index that is not authorized by the tenant token
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided tenant token cannot acces the index `dog`, allowed indexes are [\"catto\", \"doggo\"].",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
// Try to access an index that *is* authorized by the tenant token but not by the api key used to generate the tt
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {jwt}"))
.await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The API key used to generate this tenant token cannot acces the index `doggo`.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
}

View File

@@ -53,8 +53,7 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
});
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
json!({
"message": null,
json!({"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
@@ -192,9 +191,7 @@ macro_rules! compute_forbidden_search {
server.use_api_key(&web_token);
let index = server.index("sales");
index
.search(json!({}), |mut response, code| {
// We don't assert anything on the message since it may change between cases
response["message"] = serde_json::json!(null);
.search(json!({}), |response, code| {
assert_eq!(
response,
INVALID_RESPONSE.clone(),
@@ -498,8 +495,7 @@ async fn error_access_forbidden_routes() {
for ((method, route), actions) in AUTHORIZATIONS.iter() {
if !actions.contains("search") {
let (mut response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request(method, route).await;
assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403);
}
@@ -533,16 +529,14 @@ async fn error_access_expired_parent_key() {
server.use_api_key(&web_token);
// test search request while parent_key is not expired
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
// wait until the key is expired.
thread::sleep(time::Duration::new(1, 0));
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403);
}
@@ -591,8 +585,7 @@ async fn error_access_modified_token() {
.join(".");
server.use_api_key(&altered_token);
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403);
}

View File

@@ -109,11 +109,9 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
fn invalid_response(query_index: Option<usize>) -> Value {
let message = if let Some(query_index) = query_index {
json!(format!("Inside `.queries[{query_index}]`: The provided API key is invalid."))
format!("Inside `.queries[{query_index}]`: The provided API key is invalid.")
} else {
// if it's anything else we simply return null and will tests all the
// error messages somewhere else
json!(null)
"The provided API key is invalid.".to_string()
};
json!({"message": message,
"code": "invalid_api_key",
@@ -416,10 +414,7 @@ macro_rules! compute_forbidden_single_search {
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
server.use_api_key(&web_token);
let (mut response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
if failed_query_index.is_none() && !response["message"].is_null() {
response["message"] = serde_json::json!(null);
}
let (response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
assert_eq!(
response,
invalid_response(failed_query_index),
@@ -474,13 +469,10 @@ macro_rules! compute_forbidden_multiple_search {
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
server.use_api_key(&web_token);
let (mut response, code) = server.multi_search(json!({"queries" : [
let (response, code) = server.multi_search(json!({"queries" : [
{"indexUid": "sales"},
{"indexUid": "products"},
]})).await;
if failed_query_index.is_none() && !response["message"].is_null() {
response["message"] = serde_json::json!(null);
}
assert_eq!(
response,
invalid_response(failed_query_index),
@@ -1081,20 +1073,18 @@ async fn error_access_expired_parent_key() {
server.use_api_key(&web_token);
// test search request while parent_key is not expired
let (mut response, code) = server
let (response, code) = server
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
.await;
response["message"] = serde_json::json!(null);
assert_ne!(response, invalid_response(None));
assert_ne!(code, 403);
// wait until the key is expired.
thread::sleep(time::Duration::new(1, 0));
let (mut response, code) = server
let (response, code) = server
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
.await;
response["message"] = serde_json::json!(null);
assert_eq!(response, invalid_response(None));
assert_eq!(code, 403);
}
@@ -1144,9 +1134,8 @@ async fn error_access_modified_token() {
.join(".");
server.use_api_key(&altered_token);
let (mut response, code) =
let (response, code) =
server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await;
response["message"] = serde_json::json!(null);
assert_eq!(response, invalid_response(None));
assert_eq!(code, 403);
}

View File

@@ -185,7 +185,7 @@ impl Index<'_> {
pub async fn get_document(&self, id: u64, options: Option<Value>) -> (Value, StatusCode) {
let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id);
if let Some(options) = options {
write!(url, "{}", yaup::to_string(&options).unwrap()).unwrap();
write!(url, "?{}", yaup::to_string(&options).unwrap()).unwrap();
}
self.service.get(url).await
}
@@ -202,7 +202,7 @@ impl Index<'_> {
pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) {
let url = format!(
"/indexes/{}/documents{}",
"/indexes/{}/documents?{}",
urlencode(self.uid.as_ref()),
yaup::to_string(&options).unwrap()
);
@@ -365,7 +365,7 @@ impl Index<'_> {
}
pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query);
let url = format!("/indexes/{}/search?{}", urlencode(self.uid.as_ref()), query);
self.service.get(url).await
}
@@ -402,7 +402,7 @@ impl Index<'_> {
}
pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) {
let url = format!("/indexes/{}/similar{}", urlencode(self.uid.as_ref()), query);
let url = format!("/indexes/{}/similar?{}", urlencode(self.uid.as_ref()), query);
self.service.get(url).await
}
@@ -427,11 +427,8 @@ impl Index<'_> {
#[derive(Debug, Default, serde::Serialize)]
#[serde(rename_all = "camelCase")]
pub struct GetAllDocumentsOptions {
#[serde(skip_serializing_if = "Option::is_none")]
pub limit: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub offset: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub fields: Option<Vec<&'static str>>,
pub retrieve_vectors: bool,
pub fields: Option<Vec<&'static str>>,
}

View File

@@ -42,12 +42,6 @@ impl std::ops::Deref for Value {
}
}
impl std::ops::DerefMut for Value {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl PartialEq<serde_json::Value> for Value {
fn eq(&self, other: &serde_json::Value) -> bool {
&self.0 == other
@@ -71,7 +65,14 @@ impl Display for Value {
write!(
f,
"{}",
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" })
json_string!(self, {
".enqueuedAt" => "[date]",
".startedAt" => "[date]",
".finishedAt" => "[date]",
".duration" => "[duration]",
".processingTimeMs" => "[duration]",
".details.embedders.*.url" => "[url]"
})
)
}
}

View File

@@ -183,58 +183,6 @@ async fn add_single_document_gzip_encoded() {
}
"###);
}
#[actix_rt::test]
async fn add_single_document_gzip_encoded_with_incomplete_error() {
let document = json!("kefir");
// this is a what is expected and should work
let server = Server::new().await;
let app = server.init_web_app().await;
// post
let document = serde_json::to_string(&document).unwrap();
let req = test::TestRequest::post()
.uri("/indexes/dog/documents")
.set_payload(document.to_string())
.insert_header(("content-type", "application/json"))
.insert_header(("content-encoding", "gzip"))
.to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"400 Bad Request");
snapshot!(json_string!(response),
@r###"
{
"message": "The provided payload is incomplete and cannot be parsed",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// put
let req = test::TestRequest::put()
.uri("/indexes/dog/documents")
.set_payload(document.to_string())
.insert_header(("content-type", "application/json"))
.insert_header(("content-encoding", "gzip"))
.to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"400 Bad Request");
snapshot!(json_string!(response),
@r###"
{
"message": "The provided payload is incomplete and cannot be parsed",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
/// Here we try document request with every encoding
#[actix_rt::test]
@@ -1092,52 +1040,6 @@ async fn document_addition_with_primary_key() {
"###);
}
#[actix_rt::test]
async fn document_addition_with_huge_int_primary_key() {
let server = Server::new().await;
let index = server.index("test");
let documents = json!([
{
"primary": 14630868576586246730u64,
"content": "foo",
}
]);
let (response, code) = index.add_documents(documents, Some("primary")).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(response,
@r###"
{
"uid": 0,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index.get_document(14630868576586246730u64, None).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response),
@r###"
{
"primary": 14630868576586246730,
"content": "foo"
}
"###);
}
#[actix_rt::test]
async fn replace_document() {
let server = Server::new().await;

View File

@@ -719,7 +719,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!(null)).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type: expected an object, but found null",
"code": "bad_request",
@@ -730,7 +730,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
"code": "invalid_document_offset",
@@ -741,7 +741,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
"code": "invalid_document_limit",
@@ -752,7 +752,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`",
"code": "invalid_document_fields",
@@ -763,7 +763,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
"code": "invalid_document_filter",
@@ -774,7 +774,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"code": "invalid_document_filter",
@@ -786,7 +786,7 @@ async fn fetch_document_by_filter() {
let (response, code) =
index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese",
"code": "invalid_document_filter",
@@ -803,7 +803,7 @@ async fn retrieve_vectors() {
// GETALL DOCUMENTS BY QUERY
let (response, _code) = index.get_all_documents_raw("?retrieveVectors=tamo").await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
"code": "invalid_document_retrieve_vectors",
@@ -812,7 +812,7 @@ async fn retrieve_vectors() {
}
"###);
let (response, _code) = index.get_all_documents_raw("?retrieveVectors=true").await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
@@ -824,7 +824,7 @@ async fn retrieve_vectors() {
// FETCHALL DOCUMENTS BY POST
let (response, _code) =
index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`",
"code": "invalid_document_retrieve_vectors",
@@ -833,7 +833,7 @@ async fn retrieve_vectors() {
}
"###);
let (response, _code) = index.get_document_by_filter(json!({ "retrieveVectors": true })).await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
@@ -844,7 +844,7 @@ async fn retrieve_vectors() {
// GET A SINGLEDOCUMENT
let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": "tamo"}))).await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
"code": "invalid_document_retrieve_vectors",
@@ -853,7 +853,7 @@ async fn retrieve_vectors() {
}
"###);
let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": true}))).await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",

View File

@@ -71,7 +71,7 @@ async fn search_bad_offset() {
}
"###);
let (response, code) = index.search_get("?offset=doggo").await;
let (response, code) = index.search_get("offset=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -99,7 +99,7 @@ async fn search_bad_limit() {
}
"###);
let (response, code) = index.search_get("?limit=doggo").await;
let (response, code) = index.search_get("limit=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -127,7 +127,7 @@ async fn search_bad_page() {
}
"###);
let (response, code) = index.search_get("?page=doggo").await;
let (response, code) = index.search_get("page=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -155,7 +155,7 @@ async fn search_bad_hits_per_page() {
}
"###);
let (response, code) = index.search_get("?hitsPerPage=doggo").await;
let (response, code) = index.search_get("hitsPerPage=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -212,7 +212,7 @@ async fn search_bad_retrieve_vectors() {
}
"###);
let (response, code) = index.search_get("?retrieveVectors=").await;
let (response, code) = index.search_get("retrieveVectors=").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -223,7 +223,7 @@ async fn search_bad_retrieve_vectors() {
}
"###);
let (response, code) = index.search_get("?retrieveVectors=doggo").await;
let (response, code) = index.search_get("retrieveVectors=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -269,7 +269,7 @@ async fn search_bad_crop_length() {
}
"###);
let (response, code) = index.search_get("?cropLength=doggo").await;
let (response, code) = index.search_get("cropLength=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -359,7 +359,7 @@ async fn search_bad_show_matches_position() {
}
"###);
let (response, code) = index.search_get("?showMatchesPosition=doggo").await;
let (response, code) = index.search_get("showMatchesPosition=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -442,7 +442,7 @@ async fn search_non_filterable_facets() {
}
"###);
let (response, code) = index.search_get("?facets=doggo").await;
let (response, code) = index.search_get("facets=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -472,7 +472,7 @@ async fn search_non_filterable_facets_multiple_filterable() {
}
"###);
let (response, code) = index.search_get("?facets=doggo").await;
let (response, code) = index.search_get("facets=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -502,7 +502,7 @@ async fn search_non_filterable_facets_no_filterable() {
}
"###);
let (response, code) = index.search_get("?facets=doggo").await;
let (response, code) = index.search_get("facets=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -532,7 +532,7 @@ async fn search_non_filterable_facets_multiple_facets() {
}
"###);
let (response, code) = index.search_get("?facets=doggo,neko").await;
let (response, code) = index.search_get("facets=doggo,neko").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -625,7 +625,7 @@ async fn search_bad_matching_strategy() {
}
"###);
let (response, code) = index.search_get("?matchingStrategy=doggo").await;
let (response, code) = index.search_get("matchingStrategy=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{

View File

@@ -150,8 +150,7 @@ async fn bug_4640() {
"_geo": {
"lat": "45.4777599",
"lng": "9.1967508"
},
"_geoDistance": 0
}
},
{
"id": 1,

View File

@@ -150,35 +150,6 @@ async fn simple_search() {
snapshot!(response["semanticHitCount"], @"3");
}
#[actix_rt::test]
async fn limit_offset() {
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###);
snapshot!(response["semanticHitCount"], @"0");
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}}]"###);
snapshot!(response["semanticHitCount"], @"1");
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
}
#[actix_rt::test]
async fn simple_search_hf() {
let server = Server::new().await;

View File

@@ -241,7 +241,7 @@ async fn similar_bad_offset() {
}
"###);
let (response, code) = index.similar_get("?id=287947&offset=doggo").await;
let (response, code) = index.similar_get("id=287947&offset=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -283,7 +283,7 @@ async fn similar_bad_limit() {
}
"###);
let (response, code) = index.similar_get("?id=287946&limit=doggo").await;
let (response, code) = index.similar_get("id=287946&limit=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -785,7 +785,7 @@ async fn similar_bad_retrieve_vectors() {
}
"###);
let (response, code) = index.similar_get("?retrieveVectors=").await;
let (response, code) = index.similar_get("retrieveVectors=").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -796,7 +796,7 @@ async fn similar_bad_retrieve_vectors() {
}
"###);
let (response, code) = index.similar_get("?retrieveVectors=doggo").await;
let (response, code) = index.similar_get("retrieveVectors=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{

View File

@@ -2,7 +2,6 @@ mod errors;
mod webhook;
use meili_snap::insta::assert_json_snapshot;
use meili_snap::snapshot;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
@@ -739,9 +738,11 @@ async fn test_summarized_index_creation() {
async fn test_summarized_index_deletion() {
let server = Server::new().await;
let index = server.index("test");
let (ret, _code) = index.delete().await;
let task = index.wait_task(ret.uid()).await;
snapshot!(task,
index.delete().await;
index.wait_task(0).await;
let (task, _) = index.get_task(0).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 0,
@@ -766,34 +767,12 @@ async fn test_summarized_index_deletion() {
"###);
// is the details correctly set when documents are actually deleted.
// /!\ We need to wait for the document addition to be processed otherwise, if the test runs too slow,
// both tasks may get autobatched and the deleted documents count will be wrong.
let (ret, _code) =
index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await;
let task = index.wait_task(ret.uid()).await;
snapshot!(task,
@r###"
{
"uid": 1,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (ret, _code) = index.delete().await;
let task = index.wait_task(ret.uid()).await;
snapshot!(task,
index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await;
index.delete().await;
index.wait_task(2).await;
let (task, _) = index.get_task(2).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 2,
@@ -813,25 +792,22 @@ async fn test_summarized_index_deletion() {
"###);
// What happens when you delete an index that doesn't exists.
let (ret, _code) = index.delete().await;
let task = index.wait_task(ret.uid()).await;
snapshot!(task,
index.delete().await;
index.wait_task(2).await;
let (task, _) = index.get_task(2).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 3,
"uid": 2,
"indexUid": "test",
"status": "failed",
"status": "succeeded",
"type": "indexDeletion",
"canceledBy": null,
"details": {
"deletedDocuments": 0
},
"error": {
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
"deletedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",

View File

@@ -1,3 +1,4 @@
mod rest;
mod settings;
use meili_snap::{json_string, snapshot};

View File

@@ -0,0 +1,339 @@
use crate::vector::GetAllDocumentsOptions;
use meili_snap::{json_string, snapshot};
use std::sync::atomic::{AtomicUsize, Ordering};
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
use crate::common::{Server, Value};
use crate::json;
static COUNTER: AtomicUsize = AtomicUsize::new(0);
async fn create_mock() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/"))
.respond_with(|_req: &Request| {
let cpt = COUNTER.fetch_add(1, Ordering::Relaxed);
ResponseTemplate::new(200).set_body_json(json!({ "data": vec![cpt; 3] }))
})
.mount(&mock_server)
.await;
let url = mock_server.uri();
let embedder_settings = json!({
"source": "rest",
"url": url,
"dimensions": 3,
"query": {},
});
(mock_server, embedder_settings)
}
#[actix_rt::test]
async fn dummy_testing_the_mock() {
let (mock, _setting) = create_mock().await;
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[0,0,0]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[1,1,1]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[2,2,2]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[3,3,3]");
let body = reqwest::get(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @"[4,4,4]");
}
async fn get_server_vector() -> Server {
let server = Server::new().await;
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
server
}
#[actix_rt::test]
async fn bad_settings() {
let (mock, _setting) = create_mock().await;
let server = get_server_vector().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest" }),
},
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.rest`: Missing field `url` (note: this field is mandatory for source rest)",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": "kefir" }),
},
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.rest.url`: could not parse `kefir`: relative URL without a base",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri() }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 0,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]"
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with user error: was expected 'input' to be an object in query 'null'.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {} }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 1,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]",
"query": {}
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with error: component `embedding` not found in path `embedding` in response: `{\n \"data\": [\n 0,\n 0,\n 0\n ]\n}`.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {}, "pathToEmbeddings": ["data"] }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 2,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]",
"query": {},
"pathToEmbeddings": [
"data"
]
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with error: component `embedding` not found in path `embedding` in response: `{\n \"data\": [\n 1,\n 1,\n 1\n ]\n}`.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {}, "embeddingObject": ["data"] }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 3,
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"rest": {
"source": "rest",
"url": "[url]",
"query": {},
"embeddingObject": [
"data"
]
}
}
},
"error": {
"message": "internal: Error while generating embeddings: runtime error: could not determine model dimensions: test embedding failed with error: component `data` not found in path `data` in response: `{\n \"data\": [\n 2,\n 2,\n 2\n ]\n}`.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// Validate an embedder with a bad dimension of 2 instead of 3
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "query": {}, "pathToEmbeddings": [], "embeddingObject": ["data"], "dimensions": 2 }),
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let (response, code) = index.add_documents(json!( { "id": 1, "name": "kefir" }), None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": 5,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "An unexpected crash occurred when processing the task.",
"code": "internal",
"type": "internal",
"link": "https://docs.meilisearch.com/errors#internal"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn add_vector_and_user_provided() {
let (_mock, setting) = create_mock().await;
let server = get_server_vector().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": setting,
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel"},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @"");
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
}

View File

@@ -1,6 +1,6 @@
[package]
name = "milli"
edition = "2021"
edition = "2018"
publish = false
version.workspace = true

View File

@@ -95,7 +95,7 @@ impl<R: io::Read + io::Seek> EnrichedDocumentsBatchCursor<R> {
/// `next_document` advance the document reader until all the documents have been read.
pub fn next_enriched_document(
&mut self,
) -> Result<Option<EnrichedDocument<'_>>, DocumentsBatchCursorError> {
) -> Result<Option<EnrichedDocument>, DocumentsBatchCursorError> {
let document = self.documents.next_document()?;
let document_id = match self.external_ids.move_on_next()? {
Some((_, bytes)) => serde_json::from_slice(bytes).map(Some)?,

View File

@@ -27,7 +27,7 @@ use crate::{FieldId, Object, Result};
const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();
/// Helper function to convert an obkv reader into a JSON object.
pub fn obkv_to_object(obkv: &KvReader<'_, FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
obkv.iter()
.map(|(field_id, value)| {
let field_name = index
@@ -64,7 +64,7 @@ impl DocumentsBatchIndex {
self.0.len()
}
pub fn iter(&self) -> bimap::hash::Iter<'_, FieldId, String> {
pub fn iter(&self) -> bimap::hash::Iter<FieldId, String> {
self.0.iter()
}
@@ -76,7 +76,7 @@ impl DocumentsBatchIndex {
self.0.get_by_right(name).cloned()
}
pub fn recreate_json(&self, document: &obkv::KvReaderU16<'_>) -> Result<Object> {
pub fn recreate_json(&self, document: &obkv::KvReaderU16) -> Result<Object> {
let mut map = Object::new();
for (k, v) in document.iter() {

View File

@@ -52,7 +52,7 @@ impl<'a> PrimaryKey<'a> {
pub fn document_id(
&self,
document: &obkv::KvReader<'_, FieldId>,
document: &obkv::KvReader<FieldId>,
fields: &impl FieldIdMapper,
) -> Result<StdResult<String, DocumentIdExtractionError>> {
match self {
@@ -166,7 +166,7 @@ pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserE
Some(s) => Ok(s.to_string()),
None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
},
Value::Number(number) if !number.is_f64() => Ok(number.to_string()),
Value::Number(number) if number.is_i64() => Ok(number.to_string()),
content => Err(UserError::InvalidDocumentId { document_id: content }),
}
}

View File

@@ -76,7 +76,7 @@ impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
/// `next_document` advance the document reader until all the documents have been read.
pub fn next_document(
&mut self,
) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> {
) -> Result<Option<KvReader<FieldId>>, DocumentsBatchCursorError> {
match self.cursor.move_on_next()? {
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => {
Ok(Some(KvReader::new(value)))
@@ -108,7 +108,7 @@ impl From<serde_json::Error> for DocumentsBatchCursorError {
impl error::Error for DocumentsBatchCursorError {}
impl fmt::Display for DocumentsBatchCursorError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DocumentsBatchCursorError::Grenad(e) => e.fmt(f),
DocumentsBatchCursorError::SerdeJson(e) => e.fmt(f),

View File

@@ -56,7 +56,7 @@ impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> {
Ok(Ok(()))
}
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "a documents, or a sequence of documents.")
}
}

View File

@@ -24,21 +24,17 @@ impl ExternalDocumentsIds {
}
/// Returns `true` if hard and soft external documents lists are empty.
pub fn is_empty(&self, rtxn: &RoTxn<'_>) -> heed::Result<bool> {
pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
self.0.is_empty(rtxn).map_err(Into::into)
}
pub fn get<A: AsRef<str>>(
&self,
rtxn: &RoTxn<'_>,
external_id: A,
) -> heed::Result<Option<u32>> {
pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
self.0.get(rtxn, external_id.as_ref())
}
/// An helper function to debug this type, returns an `HashMap` of both,
/// soft and hard fst maps, combined.
pub fn to_hash_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashMap<String, u32>> {
pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
let mut map = HashMap::default();
for result in self.0.iter(rtxn)? {
let (external, internal) = result?;
@@ -55,11 +51,7 @@ impl ExternalDocumentsIds {
///
/// - If attempting to delete a document that doesn't exist
/// - If attempting to create a document that already exists
pub fn apply(
&self,
wtxn: &mut RwTxn<'_>,
operations: Vec<DocumentOperation>,
) -> heed::Result<()> {
pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
for DocumentOperation { external_id, internal_id, kind } in operations {
match kind {
DocumentOperationKind::Create => {
@@ -77,7 +69,7 @@ impl ExternalDocumentsIds {
}
/// Returns an iterator over all the external ids.
pub fn iter<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<RoIter<'t, Str, BEU32>> {
pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, BEU32>> {
self.0.iter(rtxn)
}
}

View File

@@ -11,7 +11,7 @@ pub enum FacetType {
}
impl fmt::Display for FacetType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
FacetType::String => f.write_str("string"),
FacetType::Number => f.write_str("number"),
@@ -37,7 +37,7 @@ impl FromStr for FacetType {
pub struct InvalidFacetType;
impl fmt::Display for InvalidFacetType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(r#"Invalid facet type, must be "string" or "number""#)
}
}

View File

@@ -20,7 +20,7 @@ impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
type EItem = (u16, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 2);
bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes());

View File

@@ -20,7 +20,7 @@ impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
type EItem = (u32, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 4);
bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes());

View File

@@ -35,7 +35,7 @@ where
fn bytes_encode(
(field_id, document_id, value): &'a Self::EItem,
) -> Result<Cow<'a, [u8]>, BoxedError> {
) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(32);
bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes

View File

@@ -24,7 +24,7 @@ impl<'a> BytesDecode<'a> for OrderedF64Codec {
impl heed::BytesEncode<'_> for OrderedF64Codec {
type EItem = f64;
fn bytes_encode(f: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
fn bytes_encode(f: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut buffer = [0u8; 16];
// write the globally ordered float

View File

@@ -21,7 +21,7 @@ impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec {
impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec {
type EItem = (FieldId, u8);
fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(2 + 1);
bytes.extend_from_slice(&field_id.to_be_bytes());
bytes.push(*word_count);

View File

@@ -16,7 +16,7 @@ impl<'a> heed::BytesDecode<'a> for ObkvCodec {
impl heed::BytesEncode<'_> for ObkvCodec {
type EItem = KvWriterU16<Vec<u8>>;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
item.clone().into_inner().map(Cow::Owned).map_err(Into::into)
}
}

View File

@@ -42,7 +42,7 @@ impl BytesDecodeOwned for BoRoaringBitmapCodec {
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut out = Vec::new();
BoRoaringBitmapCodec::serialize_into(item, &mut out);
Ok(Cow::Owned(out))

View File

@@ -167,7 +167,7 @@ impl BytesDecodeOwned for CboRoaringBitmapCodec {
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut vec = Vec::with_capacity(Self::serialized_size(item));
Self::serialize_into(item, &mut vec);
Ok(Cow::Owned(vec))

View File

@@ -26,7 +26,7 @@ impl BytesDecodeOwned for RoaringBitmapCodec {
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(item.serialized_size());
item.serialize_into(&mut bytes)?;
Ok(Cow::Owned(bytes))

View File

@@ -25,7 +25,7 @@ impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
type EItem = (Script, Language);
fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let script_name = script.name().as_bytes();
let lan_name = lan.name().as_bytes();

View File

@@ -30,7 +30,7 @@ impl<'a> heed::BytesDecode<'a> for StrBEU32Codec {
impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
type EItem = (&'a str, u32);
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let pos = pos.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + pos.len());
@@ -66,7 +66,7 @@ impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
type EItem = (&'a str, u16);
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let pos = pos.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());

View File

@@ -24,7 +24,7 @@ impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
type EItem = (u8, &'a str, &'a str);
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1.as_bytes());
@@ -51,7 +51,7 @@ impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
type EItem = (u8, &'a [u8], &'a [u8]);
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1);

View File

@@ -287,12 +287,12 @@ impl Index {
}
/// Create a write transaction to be able to write into the index.
pub fn write_txn(&self) -> heed::Result<RwTxn<'_>> {
pub fn write_txn(&self) -> heed::Result<RwTxn> {
self.env.write_txn()
}
/// Create a read transaction to be able to read the index.
pub fn read_txn(&self) -> heed::Result<RoTxn<'_>> {
pub fn read_txn(&self) -> heed::Result<RoTxn> {
self.env.read_txn()
}
@@ -344,7 +344,7 @@ impl Index {
/// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
pub(crate) fn put_documents_ids(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
docids: &RoaringBitmap,
) -> heed::Result<()> {
self.main.remap_types::<Str, RoaringBitmapCodec>().put(
@@ -355,7 +355,7 @@ impl Index {
}
/// Returns the internal documents ids.
pub fn documents_ids(&self, rtxn: &RoTxn<'_>) -> heed::Result<RoaringBitmap> {
pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
Ok(self
.main
.remap_types::<Str, RoaringBitmapCodec>()
@@ -364,7 +364,7 @@ impl Index {
}
/// Returns the number of documents indexed in the database.
pub fn number_of_documents(&self, rtxn: &RoTxn<'_>) -> Result<u64> {
pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result<u64> {
let count = self
.main
.remap_types::<Str, RoaringBitmapLenCodec>()
@@ -375,22 +375,18 @@ impl Index {
/* primary key */
/// Writes the documents primary key, this is the field name that is used to store the id.
pub(crate) fn put_primary_key(
&self,
wtxn: &mut RwTxn<'_>,
primary_key: &str,
) -> heed::Result<()> {
pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
self.main.remap_types::<Str, Str>().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
}
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::PRIMARY_KEY_KEY)
}
/// Returns the documents primary key, `None` if it hasn't been defined.
pub fn primary_key<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<Option<&'t str>> {
pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> {
self.main.remap_types::<Str, Str>().get(rtxn, main_key::PRIMARY_KEY_KEY)
}
@@ -408,7 +404,7 @@ impl Index {
/// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
pub(crate) fn put_fields_ids_map(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
map: &FieldsIdsMap,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<FieldsIdsMap>>().put(
@@ -420,7 +416,7 @@ impl Index {
/// Returns the fields ids map which associate the documents keys with an internal field id
/// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
pub fn fields_ids_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<FieldsIdsMap> {
pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result<FieldsIdsMap> {
Ok(self
.main
.remap_types::<Str, SerdeJson<FieldsIdsMap>>()
@@ -435,7 +431,7 @@ impl Index {
/// Writes the fieldids weights map which associates the field ids to their weights
pub(crate) fn put_fieldids_weights_map(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
map: &FieldidsWeightsMap,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(
@@ -446,7 +442,7 @@ impl Index {
}
/// Get the fieldids weights map which associates the field ids to their weights
pub fn fieldids_weights_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<FieldidsWeightsMap> {
pub fn fieldids_weights_map(&self, rtxn: &RoTxn) -> heed::Result<FieldidsWeightsMap> {
self.main
.remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)?
@@ -459,13 +455,13 @@ impl Index {
}
/// Delete the fieldsids weights map
pub fn delete_fieldids_weights_map(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub fn delete_fieldids_weights_map(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)
}
pub fn searchable_fields_and_weights<'a>(
&self,
rtxn: &'a RoTxn<'a>,
rtxn: &'a RoTxn,
) -> Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
let fid_map = self.fields_ids_map(rtxn)?;
let weight_map = self.fieldids_weights_map(rtxn)?;
@@ -492,7 +488,7 @@ impl Index {
/// Writes the provided `rtree` which associates coordinates to documents ids.
pub(crate) fn put_geo_rtree(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
rtree: &RTree<GeoPoint>,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>().put(
@@ -503,12 +499,12 @@ impl Index {
}
/// Delete the `rtree` which associates coordinates to documents ids.
pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_RTREE_KEY)
}
/// Returns the `rtree` which associates coordinates to documents ids.
pub fn geo_rtree(&self, rtxn: &RoTxn<'_>) -> Result<Option<RTree<GeoPoint>>> {
pub fn geo_rtree(&self, rtxn: &RoTxn) -> Result<Option<RTree<GeoPoint>>> {
match self
.main
.remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>()
@@ -524,7 +520,7 @@ impl Index {
/// Writes the documents ids that are faceted with a _geo field.
pub(crate) fn put_geo_faceted_documents_ids(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
docids: &RoaringBitmap,
) -> heed::Result<()> {
self.main.remap_types::<Str, RoaringBitmapCodec>().put(
@@ -535,15 +531,12 @@ impl Index {
}
/// Delete the documents ids that are faceted with a _geo field.
pub(crate) fn delete_geo_faceted_documents_ids(
&self,
wtxn: &mut RwTxn<'_>,
) -> heed::Result<bool> {
pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)
}
/// Retrieve all the documents ids that are faceted with a _geo field.
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn<'_>) -> heed::Result<RoaringBitmap> {
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
match self
.main
.remap_types::<Str, RoaringBitmapCodec>()
@@ -559,7 +552,7 @@ impl Index {
/// the number of times it occurs in the documents.
pub(crate) fn put_field_distribution(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
distribution: &FieldDistribution,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<FieldDistribution>>().put(
@@ -571,7 +564,7 @@ impl Index {
/// Returns the field distribution which associates every field name with
/// the number of times it occurs in the documents.
pub fn field_distribution(&self, rtxn: &RoTxn<'_>) -> heed::Result<FieldDistribution> {
pub fn field_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldDistribution> {
Ok(self
.main
.remap_types::<Str, SerdeJson<FieldDistribution>>()
@@ -585,7 +578,7 @@ impl Index {
/// There must be not be any duplicate field id.
pub(crate) fn put_displayed_fields(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
fields: &[&str],
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
@@ -597,20 +590,20 @@ impl Index {
/// Deletes the displayed fields ids, this will make the engine to display
/// all the documents attributes in the order of the `FieldsIdsMap`.
pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISPLAYED_FIELDS_KEY)
}
/// Returns the displayed fields in the order they were set by the user. If it returns
/// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<Option<Vec<&'t str>>> {
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
self.main
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
.get(rtxn, main_key::DISPLAYED_FIELDS_KEY)
}
/// Identical to `displayed_fields`, but returns the ids instead.
pub fn displayed_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<Option<Vec<FieldId>>> {
pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
match self.displayed_fields(rtxn)? {
Some(fields) => {
let fields_ids_map = self.fields_ids_map(rtxn)?;
@@ -629,7 +622,7 @@ impl Index {
/* remove hidden fields */
pub fn remove_hidden_fields(
&self,
rtxn: &RoTxn<'_>,
rtxn: &RoTxn,
fields: impl IntoIterator<Item = impl AsRef<str>>,
) -> Result<(BTreeSet<String>, bool)> {
let mut valid_fields =
@@ -651,7 +644,7 @@ impl Index {
/// Write the user defined searchable fields and generate the real searchable fields from the specified fields ids map.
pub(crate) fn put_all_searchable_fields_from_fields_ids_map(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
user_fields: &[&str],
non_searchable_fields_ids: &[FieldId],
fields_ids_map: &FieldsIdsMap,
@@ -688,7 +681,7 @@ impl Index {
Ok(())
}
pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
let did_delete_searchable = self.delete_searchable_fields(wtxn)?;
let did_delete_user_defined = self.delete_user_defined_searchable_fields(wtxn)?;
self.delete_fieldids_weights_map(wtxn)?;
@@ -696,7 +689,7 @@ impl Index {
}
/// Writes the searchable fields, when this list is specified, only these are indexed.
fn put_searchable_fields(&self, wtxn: &mut RwTxn<'_>, fields: &[&str]) -> heed::Result<()> {
fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
wtxn,
main_key::SEARCHABLE_FIELDS_KEY,
@@ -705,12 +698,12 @@ impl Index {
}
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
fn delete_searchable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
}
/// Returns the searchable fields, those are the fields that are indexed,
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<Vec<Cow<'t, str>>> {
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Vec<Cow<'t, str>>> {
self.main
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)?
@@ -726,7 +719,7 @@ impl Index {
}
/// Identical to `searchable_fields`, but returns the ids instead.
pub fn searchable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<Vec<FieldId>> {
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Vec<FieldId>> {
let fields = self.searchable_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
let mut fields_ids = Vec::new();
@@ -741,7 +734,7 @@ impl Index {
/// Writes the searchable fields, when this list is specified, only these are indexed.
pub(crate) fn put_user_defined_searchable_fields(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
fields: &[&str],
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeBincode<_>>().put(
@@ -754,7 +747,7 @@ impl Index {
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
pub(crate) fn delete_user_defined_searchable_fields(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
}
@@ -762,7 +755,7 @@ impl Index {
/// Returns the user defined searchable fields.
pub fn user_defined_searchable_fields<'t>(
&self,
rtxn: &'t RoTxn<'t>,
rtxn: &'t RoTxn,
) -> heed::Result<Option<Vec<&'t str>>> {
self.main
.remap_types::<Str, SerdeBincode<Vec<_>>>()
@@ -770,10 +763,7 @@ impl Index {
}
/// Identical to `user_defined_searchable_fields`, but returns ids instead.
pub fn user_defined_searchable_fields_ids(
&self,
rtxn: &RoTxn<'_>,
) -> Result<Option<Vec<FieldId>>> {
pub fn user_defined_searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
match self.user_defined_searchable_fields(rtxn)? {
Some(fields) => {
let fields_ids_map = self.fields_ids_map(rtxn)?;
@@ -794,7 +784,7 @@ impl Index {
/// Writes the filterable fields names in the database.
pub(crate) fn put_filterable_fields(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
fields: &HashSet<String>,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(
@@ -805,12 +795,12 @@ impl Index {
}
/// Deletes the filterable fields ids in the database.
pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY)
}
/// Returns the filterable fields names.
pub fn filterable_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self
.main
.remap_types::<Str, SerdeJson<_>>()
@@ -819,7 +809,7 @@ impl Index {
}
/// Identical to `filterable_fields`, but returns ids instead.
pub fn filterable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
pub fn filterable_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
let fields = self.filterable_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
@@ -838,7 +828,7 @@ impl Index {
/// Writes the sortable fields names in the database.
pub(crate) fn put_sortable_fields(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
fields: &HashSet<String>,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(
@@ -849,12 +839,12 @@ impl Index {
}
/// Deletes the sortable fields ids in the database.
pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SORTABLE_FIELDS_KEY)
}
/// Returns the sortable fields names.
pub fn sortable_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self
.main
.remap_types::<Str, SerdeJson<_>>()
@@ -863,7 +853,7 @@ impl Index {
}
/// Identical to `sortable_fields`, but returns ids instead.
pub fn sortable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
pub fn sortable_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
let fields = self.sortable_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
Ok(fields.into_iter().filter_map(|name| fields_ids_map.id(&name)).collect())
@@ -874,7 +864,7 @@ impl Index {
/// Writes the faceted fields in the database.
pub(crate) fn put_faceted_fields(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
fields: &HashSet<String>,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(
@@ -885,7 +875,7 @@ impl Index {
}
/// Returns the faceted fields names.
pub fn faceted_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self
.main
.remap_types::<Str, SerdeJson<_>>()
@@ -894,7 +884,7 @@ impl Index {
}
/// Identical to `faceted_fields`, but returns ids instead.
pub fn faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
let fields = self.faceted_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
@@ -913,7 +903,7 @@ impl Index {
/// Returns the user defined faceted fields names.
///
/// The user faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields.
pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn) -> Result<HashSet<String>> {
let filterable_fields = self.filterable_fields(rtxn)?;
let sortable_fields = self.sortable_fields(rtxn)?;
let distinct_field = self.distinct_field(rtxn)?;
@@ -934,7 +924,7 @@ impl Index {
}
/// Identical to `user_defined_faceted_fields`, but returns ids instead.
pub fn user_defined_faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
pub fn user_defined_faceted_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
let fields = self.user_defined_faceted_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
@@ -953,7 +943,7 @@ impl Index {
/// Retrieve all the documents which contain this field id set as null
pub fn null_faceted_documents_ids(
&self,
rtxn: &RoTxn<'_>,
rtxn: &RoTxn,
field_id: FieldId,
) -> heed::Result<RoaringBitmap> {
match self.facet_id_is_null_docids.get(rtxn, &field_id)? {
@@ -965,7 +955,7 @@ impl Index {
/// Retrieve all the documents which contain this field id and that is considered empty
pub fn empty_faceted_documents_ids(
&self,
rtxn: &RoTxn<'_>,
rtxn: &RoTxn,
field_id: FieldId,
) -> heed::Result<RoaringBitmap> {
match self.facet_id_is_empty_docids.get(rtxn, &field_id)? {
@@ -977,7 +967,7 @@ impl Index {
/// Retrieve all the documents which contain this field id
pub fn exists_faceted_documents_ids(
&self,
rtxn: &RoTxn<'_>,
rtxn: &RoTxn,
field_id: FieldId,
) -> heed::Result<RoaringBitmap> {
match self.facet_id_exists_docids.get(rtxn, &field_id)? {
@@ -990,17 +980,17 @@ impl Index {
pub(crate) fn put_distinct_field(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
distinct_field: &str,
) -> heed::Result<()> {
self.main.remap_types::<Str, Str>().put(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field)
}
pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn<'_>) -> heed::Result<Option<&'a str>> {
pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
self.main.remap_types::<Str, Str>().get(rtxn, main_key::DISTINCT_FIELD_KEY)
}
pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISTINCT_FIELD_KEY)
}
@@ -1008,7 +998,7 @@ impl Index {
pub(crate) fn put_criteria(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
criteria: &[Criterion],
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<&[Criterion]>>().put(
@@ -1018,11 +1008,11 @@ impl Index {
)
}
pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::CRITERIA_KEY)
}
pub fn criteria(&self, rtxn: &RoTxn<'_>) -> heed::Result<Vec<Criterion>> {
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> {
match self
.main
.remap_types::<Str, SerdeJson<Vec<Criterion>>>()
@@ -1038,7 +1028,7 @@ impl Index {
/// Writes the FST which is the words dictionary of the engine.
pub(crate) fn put_words_fst<A: AsRef<[u8]>>(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
fst: &fst::Set<A>,
) -> heed::Result<()> {
self.main.remap_types::<Str, Bytes>().put(
@@ -1049,7 +1039,7 @@ impl Index {
}
/// Returns the FST which is the words dictionary of the engine.
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn<'_>) -> Result<fst::Set<Cow<'t, [u8]>>> {
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_FST_KEY)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
@@ -1060,7 +1050,7 @@ impl Index {
pub(crate) fn put_stop_words<A: AsRef<[u8]>>(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
fst: &fst::Set<A>,
) -> heed::Result<()> {
self.main.remap_types::<Str, Bytes>().put(
@@ -1070,11 +1060,11 @@ impl Index {
)
}
pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::STOP_WORDS_KEY)
}
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn<'t>) -> Result<Option<fst::Set<&'t [u8]>>> {
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> {
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::STOP_WORDS_KEY)? {
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
None => Ok(None),
@@ -1085,7 +1075,7 @@ impl Index {
pub(crate) fn put_non_separator_tokens(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeBincode<_>>().put(
@@ -1095,11 +1085,11 @@ impl Index {
)
}
pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
}
pub fn non_separator_tokens(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self
.main
.remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
@@ -1110,7 +1100,7 @@ impl Index {
pub(crate) fn put_separator_tokens(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeBincode<_>>().put(
@@ -1120,11 +1110,11 @@ impl Index {
)
}
pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEPARATOR_TOKENS_KEY)
}
pub fn separator_tokens(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self
.main
.remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
@@ -1133,7 +1123,7 @@ impl Index {
/* separators easing method */
pub fn allowed_separators(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
pub fn allowed_separators(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
let default_separators =
charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string());
let mut separators: Option<BTreeSet<_>> = None;
@@ -1155,17 +1145,17 @@ impl Index {
pub(crate) fn put_dictionary(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeBincode<_>>().put(wtxn, main_key::DICTIONARY_KEY, set)
}
pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::DICTIONARY_KEY)
}
pub fn dictionary(&self, rtxn: &RoTxn<'_>) -> Result<Option<BTreeSet<String>>> {
pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self
.main
.remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
@@ -1176,7 +1166,7 @@ impl Index {
pub(crate) fn put_synonyms(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
user_defined_synonyms: &BTreeMap<String, Vec<String>>,
) -> heed::Result<()> {
@@ -1192,14 +1182,14 @@ impl Index {
)
}
pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SYNONYMS_KEY)?;
self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
}
pub fn user_defined_synonyms(
&self,
rtxn: &RoTxn<'_>,
rtxn: &RoTxn,
) -> heed::Result<BTreeMap<String, Vec<String>>> {
Ok(self
.main
@@ -1208,10 +1198,7 @@ impl Index {
.unwrap_or_default())
}
pub fn synonyms(
&self,
rtxn: &RoTxn<'_>,
) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
Ok(self
.main
.remap_types::<Str, SerdeBincode<_>>()
@@ -1221,7 +1208,7 @@ impl Index {
pub fn words_synonyms<S: AsRef<str>>(
&self,
rtxn: &RoTxn<'_>,
rtxn: &RoTxn,
words: &[S],
) -> heed::Result<Option<Vec<Vec<String>>>> {
let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
@@ -1233,7 +1220,7 @@ impl Index {
/// Writes the FST which is the words prefixes dictionary of the engine.
pub(crate) fn put_words_prefixes_fst<A: AsRef<[u8]>>(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
fst: &fst::Set<A>,
) -> heed::Result<()> {
self.main.remap_types::<Str, Bytes>().put(
@@ -1244,7 +1231,7 @@ impl Index {
}
/// Returns the FST which is the words prefixes dictionary of the engine.
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn<'t>) -> Result<fst::Set<Cow<'t, [u8]>>> {
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
@@ -1255,7 +1242,7 @@ impl Index {
/// Returns the number of documents ids associated with the given word,
/// it is much faster than deserializing the bitmap and getting the length of it.
pub fn word_documents_count(&self, rtxn: &RoTxn<'_>, word: &str) -> heed::Result<Option<u64>> {
pub fn word_documents_count(&self, rtxn: &RoTxn, word: &str) -> heed::Result<Option<u64>> {
self.word_docids.remap_data_type::<RoaringBitmapLenCodec>().get(rtxn, word)
}
@@ -1264,7 +1251,7 @@ impl Index {
/// Returns an iterator over the requested documents. The next item will be an error if a document is missing.
pub fn iter_documents<'a, 't: 'a>(
&'a self,
rtxn: &'t RoTxn<'t>,
rtxn: &'t RoTxn,
ids: impl IntoIterator<Item = DocumentId> + 'a,
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
Ok(ids.into_iter().map(move |id| {
@@ -1279,7 +1266,7 @@ impl Index {
/// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
pub fn documents<'t>(
&self,
rtxn: &'t RoTxn<'t>,
rtxn: &'t RoTxn,
ids: impl IntoIterator<Item = DocumentId>,
) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
self.iter_documents(rtxn, ids)?.collect()
@@ -1288,14 +1275,14 @@ impl Index {
/// Returns an iterator over all the documents in the index.
pub fn all_documents<'a, 't: 'a>(
&'a self,
rtxn: &'t RoTxn<'t>,
rtxn: &'t RoTxn,
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
self.iter_documents(rtxn, self.documents_ids(rtxn)?)
}
pub fn external_id_of<'a, 't: 'a>(
&'a self,
rtxn: &'t RoTxn<'t>,
rtxn: &'t RoTxn,
ids: impl IntoIterator<Item = DocumentId> + 'a,
) -> Result<impl IntoIterator<Item = Result<String>> + 'a> {
let fields = self.fields_ids_map(rtxn)?;
@@ -1323,16 +1310,16 @@ impl Index {
}))
}
pub fn facets_distribution<'a>(&'a self, rtxn: &'a RoTxn<'a>) -> FacetDistribution<'a> {
pub fn facets_distribution<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> {
FacetDistribution::new(rtxn, self)
}
pub fn search<'a>(&'a self, rtxn: &'a RoTxn<'a>) -> Search<'a> {
pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> {
Search::new(rtxn, self)
}
/// Returns the index creation time.
pub fn created_at(&self, rtxn: &RoTxn<'_>) -> Result<OffsetDateTime> {
pub fn created_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> {
Ok(self
.main
.remap_types::<Str, SerdeJson<OffsetDateTime>>()
@@ -1344,7 +1331,7 @@ impl Index {
}
/// Returns the index last updated time.
pub fn updated_at(&self, rtxn: &RoTxn<'_>) -> Result<OffsetDateTime> {
pub fn updated_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> {
Ok(self
.main
.remap_types::<Str, SerdeJson<OffsetDateTime>>()
@@ -1357,7 +1344,7 @@ impl Index {
pub(crate) fn set_updated_at(
&self,
wtxn: &mut RwTxn<'_>,
wtxn: &mut RwTxn,
time: &OffsetDateTime,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<OffsetDateTime>>().put(
@@ -1367,7 +1354,7 @@ impl Index {
)
}
pub fn authorize_typos(&self, txn: &RoTxn<'_>) -> heed::Result<bool> {
pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result<bool> {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos.
@@ -1377,7 +1364,7 @@ impl Index {
}
}
pub(crate) fn put_authorize_typos(&self, txn: &mut RwTxn<'_>, flag: bool) -> heed::Result<()> {
pub(crate) fn put_authorize_typos(&self, txn: &mut RwTxn, flag: bool) -> heed::Result<()> {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos.
@@ -1386,7 +1373,7 @@ impl Index {
Ok(())
}
pub fn min_word_len_one_typo(&self, txn: &RoTxn<'_>) -> heed::Result<u8> {
pub fn min_word_len_one_typo(&self, txn: &RoTxn) -> heed::Result<u8> {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos.
@@ -1397,11 +1384,7 @@ impl Index {
.unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO))
}
pub(crate) fn put_min_word_len_one_typo(
&self,
txn: &mut RwTxn<'_>,
val: u8,
) -> heed::Result<()> {
pub(crate) fn put_min_word_len_one_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos.
@@ -1409,7 +1392,7 @@ impl Index {
Ok(())
}
pub fn min_word_len_two_typos(&self, txn: &RoTxn<'_>) -> heed::Result<u8> {
pub fn min_word_len_two_typos(&self, txn: &RoTxn) -> heed::Result<u8> {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos.
@@ -1420,11 +1403,7 @@ impl Index {
.unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
}
pub(crate) fn put_min_word_len_two_typos(
&self,
txn: &mut RwTxn<'_>,
val: u8,
) -> heed::Result<()> {
pub(crate) fn put_min_word_len_two_typos(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos.
@@ -1433,7 +1412,7 @@ impl Index {
}
/// List the words on which typo are not allowed
pub fn exact_words<'t>(&self, txn: &'t RoTxn<'t>) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> {
pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> {
match self.main.remap_types::<Str, Bytes>().get(txn, main_key::EXACT_WORDS)? {
Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)),
None => Ok(None),
@@ -1442,7 +1421,7 @@ impl Index {
pub(crate) fn put_exact_words<A: AsRef<[u8]>>(
&self,
txn: &mut RwTxn<'_>,
txn: &mut RwTxn,
words: &fst::Set<A>,
) -> Result<()> {
self.main.remap_types::<Str, Bytes>().put(
@@ -1454,7 +1433,7 @@ impl Index {
}
/// Returns the exact attributes: attributes for which typo is disallowed.
pub fn exact_attributes<'t>(&self, txn: &'t RoTxn<'t>) -> Result<Vec<&'t str>> {
pub fn exact_attributes<'t>(&self, txn: &'t RoTxn) -> Result<Vec<&'t str>> {
Ok(self
.main
.remap_types::<Str, SerdeBincode<Vec<&str>>>()
@@ -1463,14 +1442,14 @@ impl Index {
}
/// Returns the list of exact attributes field ids.
pub fn exact_attributes_ids(&self, txn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
pub fn exact_attributes_ids(&self, txn: &RoTxn) -> Result<HashSet<FieldId>> {
let attrs = self.exact_attributes(txn)?;
let fid_map = self.fields_ids_map(txn)?;
Ok(attrs.iter().filter_map(|attr| fid_map.id(attr)).collect())
}
/// Writes the exact attributes to the database.
pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn<'_>, attrs: &[&str]) -> Result<()> {
pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn, attrs: &[&str]) -> Result<()> {
self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
txn,
main_key::EXACT_ATTRIBUTES,
@@ -1480,27 +1459,23 @@ impl Index {
}
/// Clears the exact attributes from the store.
pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(txn, main_key::EXACT_ATTRIBUTES)
}
pub fn max_values_per_facet(&self, txn: &RoTxn<'_>) -> heed::Result<Option<u64>> {
pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<u64>> {
self.main.remap_types::<Str, BEU64>().get(txn, main_key::MAX_VALUES_PER_FACET)
}
pub(crate) fn put_max_values_per_facet(
&self,
txn: &mut RwTxn<'_>,
val: u64,
) -> heed::Result<()> {
pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: u64) -> heed::Result<()> {
self.main.remap_types::<Str, BEU64>().put(txn, main_key::MAX_VALUES_PER_FACET, &val)
}
pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET)
}
pub fn sort_facet_values_by(&self, txn: &RoTxn<'_>) -> heed::Result<OrderByMap> {
pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<OrderByMap> {
let orders = self
.main
.remap_types::<Str, SerdeJson<OrderByMap>>()
@@ -1511,36 +1486,33 @@ impl Index {
pub(crate) fn put_sort_facet_values_by(
&self,
txn: &mut RwTxn<'_>,
txn: &mut RwTxn,
val: &OrderByMap,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val)
}
pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(txn, main_key::SORT_FACET_VALUES_BY)
}
pub fn pagination_max_total_hits(&self, txn: &RoTxn<'_>) -> heed::Result<Option<u64>> {
pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result<Option<u64>> {
self.main.remap_types::<Str, BEU64>().get(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
}
pub(crate) fn put_pagination_max_total_hits(
&self,
txn: &mut RwTxn<'_>,
txn: &mut RwTxn,
val: u64,
) -> heed::Result<()> {
self.main.remap_types::<Str, BEU64>().put(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val)
}
pub(crate) fn delete_pagination_max_total_hits(
&self,
txn: &mut RwTxn<'_>,
) -> heed::Result<bool> {
pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
}
pub fn proximity_precision(&self, txn: &RoTxn<'_>) -> heed::Result<Option<ProximityPrecision>> {
pub fn proximity_precision(&self, txn: &RoTxn) -> heed::Result<Option<ProximityPrecision>> {
self.main
.remap_types::<Str, SerdeBincode<ProximityPrecision>>()
.get(txn, main_key::PROXIMITY_PRECISION)
@@ -1548,7 +1520,7 @@ impl Index {
pub(crate) fn put_proximity_precision(
&self,
txn: &mut RwTxn<'_>,
txn: &mut RwTxn,
val: ProximityPrecision,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeBincode<ProximityPrecision>>().put(
@@ -1558,7 +1530,7 @@ impl Index {
)
}
pub(crate) fn delete_proximity_precision(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
pub(crate) fn delete_proximity_precision(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
}
@@ -1566,16 +1538,13 @@ impl Index {
/// Retrieve all the documents ids that correspond with (Script, Language) key, `None` if it is any.
pub fn script_language_documents_ids(
&self,
rtxn: &RoTxn<'_>,
rtxn: &RoTxn,
key: &(Script, Language),
) -> heed::Result<Option<RoaringBitmap>> {
self.script_language_docids.get(rtxn, key)
}
pub fn script_language(
&self,
rtxn: &RoTxn<'_>,
) -> heed::Result<HashMap<Script, Vec<Language>>> {
pub fn script_language(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Script, Vec<Language>>> {
let mut script_language: HashMap<Script, Vec<Language>> = HashMap::new();
let mut script_language_doc_count: Vec<(Script, Language, u64)> = Vec::new();
let mut total = 0;
@@ -1636,7 +1605,7 @@ impl Index {
&'a self,
rtxn: &'a RoTxn<'a>,
embedder_id: u8,
) -> impl Iterator<Item = Result<arroy::Reader<'a, arroy::distances::Angular>>> + 'a {
) -> impl Iterator<Item = Result<arroy::Reader<arroy::distances::Angular>>> + 'a {
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
arroy::Reader::open(rtxn, k, self.vector_arroy)
.map(Some)
@@ -1794,7 +1763,7 @@ pub(crate) mod tests {
pub fn update_settings(
&self,
update: impl Fn(&mut Settings<'_, '_, '_>),
update: impl Fn(&mut Settings),
) -> Result<(), crate::error::Error> {
let mut wtxn = self.write_txn().unwrap();
self.update_settings_using_wtxn(&mut wtxn, update)?;
@@ -1804,7 +1773,7 @@ pub(crate) mod tests {
pub fn update_settings_using_wtxn<'t>(
&'t self,
wtxn: &mut RwTxn<'t>,
update: impl Fn(&mut Settings<'_, '_, '_>),
update: impl Fn(&mut Settings),
) -> Result<(), crate::error::Error> {
let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config);
update(&mut builder);

View File

@@ -211,7 +211,7 @@ pub fn bucketed_position(relative: u16) -> u16 {
pub fn obkv_to_json(
displayed_fields: &[FieldId],
fields_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16<'_>,
obkv: obkv::KvReaderU16,
) -> Result<Object> {
displayed_fields
.iter()
@@ -229,10 +229,7 @@ pub fn obkv_to_json(
}
/// Transform every field of a raw obkv store into a JSON Object.
pub fn all_obkv_to_json(
obkv: obkv::KvReaderU16<'_>,
fields_ids_map: &FieldsIdsMap,
) -> Result<Object> {
pub fn all_obkv_to_json(obkv: obkv::KvReaderU16, fields_ids_map: &FieldsIdsMap) -> Result<Object> {
let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
obkv_to_json(all_keys.as_slice(), fields_ids_map, obkv)
}

View File

@@ -47,7 +47,7 @@ pub struct FacetDistribution<'a> {
}
impl<'a> FacetDistribution<'a> {
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> FacetDistribution<'a> {
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> FacetDistribution<'a> {
FacetDistribution {
facets: None,
candidates: None,
@@ -374,7 +374,7 @@ impl<'a> FacetDistribution<'a> {
}
impl fmt::Debug for FacetDistribution<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let FacetDistribution {
facets,
candidates,

View File

@@ -221,14 +221,14 @@ impl<'a> Filter<'a> {
}
impl<'a> Filter<'a> {
pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result<RoaringBitmap> {
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
let filterable_fields = index.filterable_fields(rtxn)?;
self.inner_evaluate(rtxn, index, &filterable_fields, None)
}
fn evaluate_operator(
rtxn: &heed::RoTxn<'_>,
rtxn: &heed::RoTxn,
index: &Index,
field_id: FieldId,
universe: Option<&RoaringBitmap>,
@@ -313,7 +313,7 @@ impl<'a> Filter<'a> {
/// Aggregates the documents ids that are part of the specified range automatically
/// going deeper through the levels.
fn explore_facet_number_levels(
rtxn: &heed::RoTxn<'_>,
rtxn: &heed::RoTxn,
db: heed::Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
field_id: FieldId,
left: Bound<f64>,
@@ -338,7 +338,7 @@ impl<'a> Filter<'a> {
fn inner_evaluate(
&self,
rtxn: &heed::RoTxn<'_>,
rtxn: &heed::RoTxn,
index: &Index,
filterable_fields: &HashSet<String>,
universe: Option<&RoaringBitmap>,

View File

@@ -33,7 +33,7 @@ fn facet_extreme_value<'t>(
pub fn facet_min_value<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn<'t>,
rtxn: &'t heed::RoTxn,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
@@ -44,7 +44,7 @@ pub fn facet_min_value<'t>(
pub fn facet_max_value<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn<'t>,
rtxn: &'t heed::RoTxn,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
@@ -55,7 +55,7 @@ pub fn facet_max_value<'t>(
/// Get the first facet value in the facet database
pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>(
txn: &'t RoTxn<'t>,
txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>
@@ -79,7 +79,7 @@ where
/// Get the last facet value in the facet database
pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>(
txn: &'t RoTxn<'t>,
txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>

View File

@@ -17,7 +17,6 @@ struct ScoreWithRatioResult {
type ScoreWithRatio = (Vec<ScoreDetails>, f32);
#[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
fn compare_scores(
&(ref left_scores, left_ratio): &ScoreWithRatio,
&(ref right_scores, right_ratio): &ScoreWithRatio,
@@ -85,7 +84,6 @@ impl ScoreWithRatioResult {
}
}
#[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
fn merge(
vector_results: Self,
keyword_results: Self,
@@ -152,7 +150,6 @@ impl ScoreWithRatioResult {
}
impl<'a> Search<'a> {
#[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
pub fn execute_hybrid(&self, semantic_ratio: f32) -> Result<(SearchResult, Option<u32>)> {
// TODO: find classier way to achieve that than to reset vector and query params
// create separate keyword and semantic searches
@@ -181,25 +178,22 @@ impl<'a> Search<'a> {
// completely skip semantic search if the results of the keyword search are good enough
if self.results_good_enough(&keyword_results, semantic_ratio) {
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
return Ok((keyword_results, Some(0)));
}
// no vector search against placeholder search
let Some(query) = search.query.take() else {
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
return Ok((keyword_results, Some(0)));
};
// no embedder, no semantic search
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
return Ok((keyword_results, Some(0)));
};
let vector_query = match vector {
Some(vector_query) => vector_query,
None => {
// attempt to embed the vector
let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
let _entered = span.enter();
match embedder.embed_one(query) {
Ok(embedding) => embedding,
Err(error) => {
@@ -245,44 +239,3 @@ impl<'a> Search<'a> {
true
}
}
fn return_keyword_results(
limit: usize,
offset: usize,
SearchResult {
matching_words,
candidates,
mut documents_ids,
mut document_scores,
degraded,
used_negative_operator,
}: SearchResult,
) -> (SearchResult, Option<u32>) {
let (documents_ids, document_scores) = if offset >= documents_ids.len() ||
// technically redudant because documents_ids.len() == document_scores.len(),
// defensive programming
offset >= document_scores.len()
{
(vec![], vec![])
} else {
// PANICS: offset < len
documents_ids.rotate_left(offset);
documents_ids.truncate(limit);
// PANICS: offset < len
document_scores.rotate_left(offset);
document_scores.truncate(limit);
(documents_ids, document_scores)
};
(
SearchResult {
matching_words,
candidates,
documents_ids,
document_scores,
degraded,
used_negative_operator,
},
Some(0),
)
}

View File

@@ -55,7 +55,7 @@ pub struct Search<'a> {
}
impl<'a> Search<'a> {
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Search<'a> {
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
Search {
query: None,
filter: None,
@@ -253,7 +253,7 @@ impl<'a> Search<'a> {
}
impl fmt::Debug for Search<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let Search {
query,
filter,
@@ -371,28 +371,4 @@ mod test {
assert_eq!(documents_ids, vec![1]);
}
#[cfg(feature = "korean")]
#[test]
fn test_hangul_language_detection() {
use crate::index::tests::TempIndex;
let index = TempIndex::new();
index
.add_documents(documents!([
{ "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
{ "id": 1, "title": "김밥먹을래。" },
{ "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
]))
.unwrap();
let txn = index.write_txn().unwrap();
let mut search = Search::new(&txn, &index);
search.query("김밥");
let SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![1]);
}
}

View File

@@ -213,6 +213,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
continue;
}
let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
let entered = span.enter();
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
ctx,
logger,
@@ -222,6 +225,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
back!();
continue;
};
drop(entered);
ranking_rule_scores.push(next_bucket.score);

View File

@@ -47,7 +47,7 @@ pub struct DatabaseCache<'ctx> {
}
impl<'ctx> DatabaseCache<'ctx> {
fn get_value<'v, K1, KC, DC>(
txn: &'ctx RoTxn<'_>,
txn: &'ctx RoTxn,
cache_key: K1,
db_key: &'v KC::EItem,
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
@@ -77,7 +77,7 @@ impl<'ctx> DatabaseCache<'ctx> {
}
fn get_value_from_keys<'v, K1, KC, DC>(
txn: &'ctx RoTxn<'_>,
txn: &'ctx RoTxn,
cache_key: K1,
db_keys: &'v [KC::EItem],
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
@@ -99,7 +99,7 @@ impl<'ctx> DatabaseCache<'ctx> {
.iter()
.filter_map(|key| db.get(txn, key).transpose())
.map(|v| v.map(Cow::Borrowed))
.collect::<std::result::Result<Vec<Cow<'_, [u8]>>, _>>()?;
.collect::<std::result::Result<Vec<Cow<[u8]>>, _>>()?;
if bitmaps.is_empty() {
None

View File

@@ -23,7 +23,7 @@ pub struct DistinctOutput {
/// - `excluded`: the set of document ids that contain a value for the given field that occurs
/// in the given candidates.
pub fn apply_distinct_rule(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
field_id: u16,
candidates: &RoaringBitmap,
) -> Result<DistinctOutput> {
@@ -42,7 +42,7 @@ pub fn apply_distinct_rule(
/// Apply the distinct rule defined by [`apply_distinct_rule`] for a single document id.
pub fn distinct_single_docid(
index: &Index,
txn: &RoTxn<'_>,
txn: &RoTxn,
field_id: u16,
docid: u32,
excluded: &mut RoaringBitmap,
@@ -72,7 +72,7 @@ pub fn distinct_single_docid(
/// Return all the docids containing the given value in the given field
fn facet_value_docids(
database: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
txn: &RoTxn<'_>,
txn: &RoTxn,
field_id: u16,
facet_value: &[u8],
) -> heed::Result<Option<RoaringBitmap>> {
@@ -86,7 +86,7 @@ fn facet_number_values<'a>(
docid: u32,
field_id: u16,
index: &Index,
txn: &'a RoTxn<'a>,
txn: &'a RoTxn,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Unit>> {
let key = facet_values_prefix_key(field_id, docid);
@@ -104,7 +104,7 @@ pub fn facet_string_values<'a>(
docid: u32,
field_id: u16,
index: &Index,
txn: &'a RoTxn<'a>,
txn: &'a RoTxn,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Str>> {
let key = facet_values_prefix_key(field_id, docid);

View File

@@ -27,7 +27,6 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
"exact_attribute".to_owned()
}
#[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
fn start_iteration(
&mut self,
ctx: &mut SearchContext<'ctx>,
@@ -39,7 +38,6 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
fn next_bucket(
&mut self,
_ctx: &mut SearchContext<'ctx>,
@@ -53,7 +51,6 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
Ok(output)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
fn end_iteration(
&mut self,
_ctx: &mut SearchContext<'ctx>,

View File

@@ -28,7 +28,7 @@ fn facet_number_values<'a>(
docid: u32,
field_id: u16,
index: &Index,
txn: &'a RoTxn<'a>,
txn: &'a RoTxn,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<OrderedF64Codec>, Unit>> {
let key = facet_values_prefix_key(field_id, docid);
@@ -109,7 +109,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
/// Drop the rtree if we don't need it anymore.
fn fill_buffer(
&mut self,
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
geo_candidates: &RoaringBitmap,
) -> Result<()> {
debug_assert!(self.field_ids.is_some(), "fill_buffer can't be called without the lat&lng");
@@ -182,7 +182,7 @@ fn geo_value(
field_lat: u16,
field_lng: u16,
index: &Index,
rtxn: &RoTxn<'_>,
rtxn: &RoTxn,
) -> Result<[f64; 2]> {
let extract_geo = |geo_field: u16| -> Result<f64> {
match facet_number_values(docid, geo_field, index, rtxn)?.next() {
@@ -209,7 +209,6 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
"geo_sort".to_owned()
}
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
fn start_iteration(
&mut self,
ctx: &mut SearchContext<'ctx>,
@@ -235,7 +234,6 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
#[allow(clippy::only_used_in_recursion)]
fn next_bucket(
&mut self,
@@ -287,7 +285,6 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
self.next_bucket(ctx, logger, universe)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
fn end_iteration(&mut self, _ctx: &mut SearchContext<'ctx>, _logger: &mut dyn SearchLogger<Q>) {
// we do not reset the rtree here, it could be used in a next iteration
self.query = None;

View File

@@ -127,8 +127,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
fn id(&self) -> String {
self.id.clone()
}
#[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")]
fn start_iteration(
&mut self,
ctx: &mut SearchContext<'ctx>,
@@ -211,7 +209,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")]
fn next_bucket(
&mut self,
ctx: &mut SearchContext<'ctx>,
@@ -361,7 +358,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket, score }))
}
#[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")]
fn end_iteration(
&mut self,
_ctx: &mut SearchContext<'ctx>,
@@ -375,7 +371,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
/// docids and the previous path docids is empty.
#[allow(clippy::too_many_arguments)]
fn visit_path_condition<G: RankingRuleGraphTrait>(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
graph: &mut RankingRuleGraph<G>,
universe: &RoaringBitmap,
dead_ends_cache: &mut DeadEndsCache<G::Condition>,

View File

@@ -20,13 +20,13 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
fn query_for_initial_universe(&mut self, _query: &Q);
/// Logs the ranking rules used to perform the search query
fn ranking_rules(&mut self, _rr: &[BoxRankingRule<'_, Q>]);
fn ranking_rules(&mut self, _rr: &[BoxRankingRule<Q>]);
/// Logs the start of a ranking rule's iteration.
fn start_iteration_ranking_rule(
&mut self,
_ranking_rule_idx: usize,
_ranking_rule: &dyn RankingRule<'_, Q>,
_ranking_rule: &dyn RankingRule<Q>,
_query: &Q,
_universe: &RoaringBitmap,
) {
@@ -35,7 +35,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
fn next_bucket_ranking_rule(
&mut self,
_ranking_rule_idx: usize,
_ranking_rule: &dyn RankingRule<'_, Q>,
_ranking_rule: &dyn RankingRule<Q>,
_universe: &RoaringBitmap,
_candidates: &RoaringBitmap,
) {
@@ -44,7 +44,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
fn skip_bucket_ranking_rule(
&mut self,
_ranking_rule_idx: usize,
_ranking_rule: &dyn RankingRule<'_, Q>,
_ranking_rule: &dyn RankingRule<Q>,
_candidates: &RoaringBitmap,
) {
}
@@ -52,7 +52,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
fn end_iteration_ranking_rule(
&mut self,
_ranking_rule_idx: usize,
_ranking_rule: &dyn RankingRule<'_, Q>,
_ranking_rule: &dyn RankingRule<Q>,
_universe: &RoaringBitmap,
) {
}
@@ -73,7 +73,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
fn query_for_initial_universe(&mut self, _query: &Q) {}
fn ranking_rules(&mut self, _rr: &[BoxRankingRule<'_, Q>]) {}
fn ranking_rules(&mut self, _rr: &[BoxRankingRule<Q>]) {}
fn add_to_results(&mut self, _docids: &[u32]) {}

View File

@@ -69,14 +69,14 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
fn initial_universe(&mut self, universe: &RoaringBitmap) {
self.initial_universe = Some(universe.clone());
}
fn ranking_rules(&mut self, rr: &[BoxRankingRule<'_, QueryGraph>]) {
fn ranking_rules(&mut self, rr: &[BoxRankingRule<QueryGraph>]) {
self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect());
}
fn start_iteration_ranking_rule(
&mut self,
ranking_rule_idx: usize,
ranking_rule: &dyn RankingRule<'_, QueryGraph>,
ranking_rule: &dyn RankingRule<QueryGraph>,
_query: &QueryGraph,
universe: &RoaringBitmap,
) {
@@ -97,7 +97,7 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
fn next_bucket_ranking_rule(
&mut self,
ranking_rule_idx: usize,
_ranking_rule: &dyn RankingRule<'_, QueryGraph>,
_ranking_rule: &dyn RankingRule<QueryGraph>,
universe: &RoaringBitmap,
bucket: &RoaringBitmap,
) {
@@ -110,7 +110,7 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
fn skip_bucket_ranking_rule(
&mut self,
ranking_rule_idx: usize,
_ranking_rule: &dyn RankingRule<'_, QueryGraph>,
_ranking_rule: &dyn RankingRule<QueryGraph>,
bucket: &RoaringBitmap,
) {
self.events.push(SearchEvents::RankingRuleSkipBucket {
@@ -122,7 +122,7 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
fn end_iteration_ranking_rule(
&mut self,
ranking_rule_idx: usize,
_ranking_rule: &dyn RankingRule<'_, QueryGraph>,
_ranking_rule: &dyn RankingRule<QueryGraph>,
_universe: &RoaringBitmap,
) {
self.events.push(SearchEvents::RankingRuleEndIteration { ranking_rule_idx });

View File

@@ -32,7 +32,7 @@ pub struct MatchingWords {
}
impl MatchingWords {
pub fn new(ctx: SearchContext<'_>, located_terms: Vec<LocatedQueryTerm>) -> Self {
pub fn new(ctx: SearchContext, located_terms: Vec<LocatedQueryTerm>) -> Self {
let mut phrases = Vec::new();
let mut words = Vec::new();
@@ -74,7 +74,7 @@ impl MatchingWords {
}
/// Try to match the token with one of the located_words.
fn match_unique_words<'a>(&'a self, token: &Token<'_>) -> Option<MatchType<'a>> {
fn match_unique_words<'a>(&'a self, token: &Token) -> Option<MatchType<'a>> {
for located_words in &self.words {
for word in &located_words.value {
let word = self.word_interner.get(*word);
@@ -166,7 +166,7 @@ impl<'a> PartialMatch<'a> {
/// - None if the given token breaks the partial match
/// - Partial if the given token matches the partial match but doesn't complete it
/// - Full if the given token completes the partial match
pub fn match_token(self, token: &Token<'_>) -> Option<MatchType<'a>> {
pub fn match_token(self, token: &Token) -> Option<MatchType<'a>> {
let Self { mut matching_words, ids, .. } = self;
let is_matching = match matching_words.first()? {
@@ -198,7 +198,7 @@ impl<'a> PartialMatch<'a> {
}
impl fmt::Debug for MatchingWords {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let MatchingWords { word_interner, phrase_interner, phrases, words } = self;
let phrases: Vec<_> = phrases

View File

@@ -123,7 +123,7 @@ impl<'t> Matcher<'t, '_> {
/// some words are counted as matches only if they are close together and in the good order,
/// compute_partial_match peek into next words to validate if the match is complete.
fn compute_partial_match<'a>(
mut partial: PartialMatch<'a>,
mut partial: PartialMatch,
token_position: usize,
word_position: usize,
words_positions: &mut impl Iterator<Item = (usize, usize, &'a Token<'a>)>,
@@ -244,12 +244,7 @@ impl<'t> Matcher<'t, '_> {
}
/// Returns the bounds in byte index of the crop window.
fn crop_bounds(
&self,
tokens: &[Token<'_>],
matches: &[Match],
crop_size: usize,
) -> (usize, usize) {
fn crop_bounds(&self, tokens: &[Token], matches: &[Match], crop_size: usize) -> (usize, usize) {
// if there is no match, we start from the beginning of the string by default.
let first_match_word_position = matches.first().map(|m| m.word_position).unwrap_or(0);
let first_match_token_position = matches.first().map(|m| m.token_position).unwrap_or(0);
@@ -510,7 +505,7 @@ mod tests {
use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
impl<'a> MatcherBuilder<'a> {
fn new_test(rtxn: &'a heed::RoTxn<'a>, index: &'a TempIndex, query: &str) -> Self {
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
let mut ctx = SearchContext::new(index, rtxn).unwrap();
let universe = filtered_universe(ctx.index, ctx.txn, &None).unwrap();
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(

View File

@@ -183,7 +183,7 @@ impl RestrictedFids {
/// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it.
fn resolve_maximally_reduced_query_graph(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
universe: &RoaringBitmap,
query_graph: &QueryGraph,
matching_strategy: TermsMatchingStrategy,
@@ -212,9 +212,9 @@ fn resolve_maximally_reduced_query_graph(
Ok(docids)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::universe")]
#[tracing::instrument(level = "trace", skip_all, target = "search")]
fn resolve_universe(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
initial_universe: &RoaringBitmap,
query_graph: &QueryGraph,
matching_strategy: TermsMatchingStrategy,
@@ -229,9 +229,9 @@ fn resolve_universe(
)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
#[tracing::instrument(level = "trace", skip_all, target = "search")]
fn resolve_negative_words(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
negative_words: &[Word],
) -> Result<RoaringBitmap> {
let mut negative_bitmap = RoaringBitmap::new();
@@ -243,9 +243,9 @@ fn resolve_negative_words(
Ok(negative_bitmap)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
#[tracing::instrument(level = "trace", skip_all, target = "search")]
fn resolve_negative_phrases(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
negative_phrases: &[LocatedQueryTerm],
) -> Result<RoaringBitmap> {
let mut negative_bitmap = RoaringBitmap::new();
@@ -267,7 +267,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
let mut sort = false;
let mut sorted_fields = HashSet::new();
let mut geo_sorted = false;
let mut ranking_rules: Vec<BoxRankingRule<'ctx, PlaceholderQuery>> = vec![];
let mut ranking_rules: Vec<BoxRankingRule<PlaceholderQuery>> = vec![];
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
for rr in settings_ranking_rules {
match rr {
@@ -326,7 +326,7 @@ fn get_ranking_rules_for_vector<'ctx>(
let mut geo_sorted = false;
let mut vector = false;
let mut ranking_rules: Vec<BoxRankingRule<'ctx, PlaceholderQuery>> = vec![];
let mut ranking_rules: Vec<BoxRankingRule<PlaceholderQuery>> = vec![];
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
for rr in settings_ranking_rules {
@@ -406,7 +406,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
words = true;
}
let mut ranking_rules: Vec<BoxRankingRule<'ctx, QueryGraph>> = vec![];
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
for rr in settings_ranking_rules {
// Add Words before any of: typo, proximity, attribute
@@ -548,11 +548,11 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "search::universe")]
#[tracing::instrument(level = "trace", skip_all, target = "search")]
pub fn filtered_universe(
index: &Index,
txn: &RoTxn<'_>,
filters: &Option<Filter<'_>>,
filters: &Option<Filter>,
) -> Result<RoaringBitmap> {
Ok(if let Some(filters) = filters {
filters.evaluate(txn, index)?
@@ -563,7 +563,7 @@ pub fn filtered_universe(
#[allow(clippy::too_many_arguments)]
pub fn execute_vector_search(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
vector: &[f32],
scoring_strategy: ScoringStrategy,
universe: RoaringBitmap,
@@ -620,9 +620,9 @@ pub fn execute_vector_search(
}
#[allow(clippy::too_many_arguments)]
#[tracing::instrument(level = "trace", skip_all, target = "search::main")]
#[tracing::instrument(level = "trace", skip_all, target = "search")]
pub fn execute_search(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
query: Option<&str>,
terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy,
@@ -775,10 +775,7 @@ pub fn execute_search(
})
}
fn check_sort_criteria(
ctx: &SearchContext<'_>,
sort_criteria: Option<&Vec<AscDesc>>,
) -> Result<()> {
fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>) -> Result<()> {
let sort_criteria = if let Some(sort_criteria) = sort_criteria {
sort_criteria
} else {

View File

@@ -93,7 +93,7 @@ impl QueryGraph {
/// Build the query graph from the parsed user search query, return an updated list of the located query terms
/// which contains ngrams.
pub fn from_query(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
// The terms here must be consecutive
terms: &[LocatedQueryTerm],
) -> Result<(QueryGraph, Vec<LocatedQueryTerm>)> {
@@ -294,7 +294,7 @@ impl QueryGraph {
pub fn removal_order_for_terms_matching_strategy_frequency(
&self,
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
) -> Result<Vec<SmallBitmap<QueryNode>>> {
// lookup frequency for each term
let mut term_with_frequency: Vec<(u8, u64)> = {
@@ -337,7 +337,7 @@ impl QueryGraph {
pub fn removal_order_for_terms_matching_strategy_last(
&self,
ctx: &SearchContext<'_>,
ctx: &SearchContext,
) -> Vec<SmallBitmap<QueryNode>> {
let (first_term_idx, last_term_idx) = {
let mut first_term_idx = u8::MAX;
@@ -370,7 +370,7 @@ impl QueryGraph {
pub fn removal_order_for_terms_matching_strategy(
&self,
ctx: &SearchContext<'_>,
ctx: &SearchContext,
order: impl Fn(u8) -> u16,
) -> Vec<SmallBitmap<QueryNode>> {
let mut nodes_to_remove = BTreeMap::<u16, SmallBitmap<QueryNode>>::new();
@@ -398,7 +398,7 @@ impl QueryGraph {
}
/// Number of words in the phrases in this query graph
pub(crate) fn words_in_phrases_count(&self, ctx: &SearchContext<'_>) -> usize {
pub(crate) fn words_in_phrases_count(&self, ctx: &SearchContext) -> usize {
let mut word_count = 0;
for (_, node) in self.nodes.iter() {
match &node.data {

View File

@@ -27,7 +27,7 @@ pub enum ZeroOrOneTypo {
}
impl Interned<QueryTerm> {
pub fn compute_fully_if_needed(self, ctx: &mut SearchContext<'_>) -> Result<()> {
pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> {
let s = ctx.term_interner.get_mut(self);
if s.max_levenshtein_distance <= 1 && s.one_typo.is_uninit() {
assert!(s.two_typo.is_uninit());
@@ -48,7 +48,7 @@ impl Interned<QueryTerm> {
fn find_zero_typo_prefix_derivations(
word_interned: Interned<String>,
fst: fst::Set<Cow<'_, [u8]>>,
fst: fst::Set<Cow<[u8]>>,
word_interner: &mut DedupInterner<String>,
mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
) -> Result<()> {
@@ -71,7 +71,7 @@ fn find_zero_typo_prefix_derivations(
}
fn find_zero_one_typo_derivations(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
word_interned: Interned<String>,
is_prefix: bool,
mut visit: impl FnMut(Interned<String>, ZeroOrOneTypo) -> Result<ControlFlow<()>>,
@@ -114,7 +114,7 @@ fn find_zero_one_typo_derivations(
fn find_zero_one_two_typo_derivations(
word_interned: Interned<String>,
is_prefix: bool,
fst: fst::Set<Cow<'_, [u8]>>,
fst: fst::Set<Cow<[u8]>>,
word_interner: &mut DedupInterner<String>,
mut visit: impl FnMut(Interned<String>, NumberOfTypos) -> Result<ControlFlow<()>>,
) -> Result<()> {
@@ -172,7 +172,7 @@ fn find_zero_one_two_typo_derivations(
}
pub fn partially_initialized_term_from_word(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
word: &str,
max_typo: u8,
is_prefix: bool,
@@ -265,7 +265,7 @@ pub fn partially_initialized_term_from_word(
})
}
fn find_split_words(ctx: &mut SearchContext<'_>, word: &str) -> Result<Option<Interned<Phrase>>> {
fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Interned<Phrase>>> {
if let Some((l, r)) = split_best_frequency(ctx, word)? {
Ok(Some(ctx.phrase_interner.insert(Phrase { words: vec![Some(l), Some(r)] })))
} else {
@@ -274,7 +274,7 @@ fn find_split_words(ctx: &mut SearchContext<'_>, word: &str) -> Result<Option<In
}
impl Interned<QueryTerm> {
fn initialize_one_typo_subterm(self, ctx: &mut SearchContext<'_>) -> Result<()> {
fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
let self_mut = ctx.term_interner.get_mut(self);
let allows_split_words = self_mut.allows_split_words();
@@ -340,7 +340,7 @@ impl Interned<QueryTerm> {
Ok(())
}
fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext<'_>) -> Result<()> {
fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
let self_mut = ctx.term_interner.get_mut(self);
let QueryTerm {
original,
@@ -406,7 +406,7 @@ impl Interned<QueryTerm> {
///
/// Return `None` if the original word cannot be split.
fn split_best_frequency(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
original: &str,
) -> Result<Option<(Interned<String>, Interned<String>)>> {
let chars = original.char_indices().skip(1);

View File

@@ -128,7 +128,7 @@ impl QueryTermSubset {
pub fn make_mandatory(&mut self) {
self.mandatory = true;
}
pub fn exact_term(&self, ctx: &SearchContext<'_>) -> Option<ExactTerm> {
pub fn exact_term(&self, ctx: &SearchContext) -> Option<ExactTerm> {
let full_query_term = ctx.term_interner.get(self.original);
if full_query_term.ngram_words.is_some() {
return None;
@@ -174,7 +174,7 @@ impl QueryTermSubset {
self.two_typo_subset.intersect(&other.two_typo_subset);
}
pub fn use_prefix_db(&self, ctx: &SearchContext<'_>) -> Option<Word> {
pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> {
let original = ctx.term_interner.get(self.original);
let use_prefix_db = original.zero_typo.use_prefix_db?;
let word = match &self.zero_typo_subset {
@@ -198,7 +198,7 @@ impl QueryTermSubset {
}
pub fn all_single_words_except_prefix_db(
&self,
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
) -> Result<BTreeSet<Word>> {
let mut result = BTreeSet::default();
if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() {
@@ -290,7 +290,7 @@ impl QueryTermSubset {
Ok(result)
}
pub fn all_phrases(&self, ctx: &mut SearchContext<'_>) -> Result<BTreeSet<Interned<Phrase>>> {
pub fn all_phrases(&self, ctx: &mut SearchContext) -> Result<BTreeSet<Interned<Phrase>>> {
let mut result = BTreeSet::default();
if !self.one_typo_subset.is_empty() {
@@ -328,7 +328,7 @@ impl QueryTermSubset {
Ok(result)
}
pub fn original_phrase(&self, ctx: &SearchContext<'_>) -> Option<Interned<Phrase>> {
pub fn original_phrase(&self, ctx: &SearchContext) -> Option<Interned<Phrase>> {
let t = ctx.term_interner.get(self.original);
if let Some(p) = t.zero_typo.phrase {
if self.zero_typo_subset.contains_phrase(p) {
@@ -337,7 +337,7 @@ impl QueryTermSubset {
}
None
}
pub fn max_typo_cost(&self, ctx: &SearchContext<'_>) -> u8 {
pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 {
let t = ctx.term_interner.get(self.original);
match t.max_levenshtein_distance {
0 => {
@@ -368,7 +368,7 @@ impl QueryTermSubset {
_ => panic!(),
}
}
pub fn keep_only_exact_term(&mut self, ctx: &SearchContext<'_>) {
pub fn keep_only_exact_term(&mut self, ctx: &SearchContext) {
if let Some(term) = self.exact_term(ctx) {
match term {
ExactTerm::Phrase(p) => {
@@ -399,7 +399,7 @@ impl QueryTermSubset {
pub fn clear_two_typo_subset(&mut self) {
self.two_typo_subset = NTypoTermSubset::Nothing;
}
pub fn description(&self, ctx: &SearchContext<'_>) -> String {
pub fn description(&self, ctx: &SearchContext) -> String {
let t = ctx.term_interner.get(self.original);
ctx.word_interner.get(t.original).to_owned()
}
@@ -446,7 +446,7 @@ impl QueryTerm {
impl Interned<QueryTerm> {
/// Return the original word from the given query term
fn original_single_word(self, ctx: &SearchContext<'_>) -> Option<Interned<String>> {
fn original_single_word(self, ctx: &SearchContext) -> Option<Interned<String>> {
let self_ = ctx.term_interner.get(self);
if self_.ngram_words.is_some() {
None
@@ -477,7 +477,7 @@ impl QueryTerm {
pub fn is_prefix(&self) -> bool {
self.is_prefix
}
pub fn original_word(&self, ctx: &SearchContext<'_>) -> String {
pub fn original_word(&self, ctx: &SearchContext) -> String {
ctx.word_interner.get(self.original).clone()
}

View File

@@ -23,8 +23,8 @@ pub struct ExtractedTokens {
/// Convert the tokenised search query into a list of located query terms.
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
pub fn located_query_terms_from_tokens(
ctx: &mut SearchContext<'_>,
query: NormalizedTokenIter<'_, '_>,
ctx: &mut SearchContext,
query: NormalizedTokenIter,
words_limit: Option<usize>,
) -> Result<ExtractedTokens> {
let nbr_typos = number_of_typos_allowed(ctx)?;
@@ -214,7 +214,7 @@ pub fn number_of_typos_allowed<'ctx>(
}
pub fn make_ngram(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
terms: &[LocatedQueryTerm],
number_of_typos_allowed: &impl Fn(&str) -> u8,
) -> Result<Option<LocatedQueryTerm>> {
@@ -297,12 +297,7 @@ impl PhraseBuilder {
}
// precondition: token has kind Word or StopWord
fn push_word(
&mut self,
ctx: &mut SearchContext<'_>,
token: &charabia::Token<'_>,
position: u16,
) {
fn push_word(&mut self, ctx: &mut SearchContext, token: &charabia::Token, position: u16) {
if self.is_empty() {
self.start = position;
}
@@ -316,7 +311,7 @@ impl PhraseBuilder {
}
}
fn build(self, ctx: &mut SearchContext<'_>) -> Option<LocatedQueryTerm> {
fn build(self, ctx: &mut SearchContext) -> Option<LocatedQueryTerm> {
if self.is_empty() {
return None;
}

View File

@@ -10,11 +10,11 @@ pub struct Phrase {
pub words: Vec<Option<Interned<String>>>,
}
impl Interned<Phrase> {
pub fn description(self, ctx: &SearchContext<'_>) -> String {
pub fn description(self, ctx: &SearchContext) -> String {
let p = ctx.phrase_interner.get(self);
p.words.iter().flatten().map(|w| ctx.word_interner.get(*w)).join(" ")
}
pub fn words(self, ctx: &SearchContext<'_>) -> Vec<Option<Interned<String>>> {
pub fn words(self, ctx: &SearchContext) -> Vec<Option<Interned<String>>> {
let p = ctx.phrase_interner.get(self);
p.words.clone()
}

View File

@@ -10,7 +10,7 @@ use crate::Result;
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
/// Build the ranking rule graph from the given query graph
pub fn build(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
query_graph: QueryGraph,
cost_of_ignoring_node: MappedInterner<QueryNode, Option<(u32, SmallBitmap<QueryNode>)>>,
) -> Result<Self> {

View File

@@ -117,7 +117,7 @@ impl<'a, G: RankingRuleGraphTrait> PathVisitor<'a, G> {
}
/// See module documentation
pub fn visit_paths(mut self, visit: VisitFn<'_, G>) -> Result<()> {
pub fn visit_paths(mut self, visit: VisitFn<G>) -> Result<()> {
let _ =
self.state.visit_node(self.ctx.graph.query_graph.root_node, visit, &mut self.ctx)?;
Ok(())
@@ -132,8 +132,8 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
fn visit_node(
&mut self,
from_node: Interned<QueryNode>,
visit: VisitFn<'_, G>,
ctx: &mut VisitorContext<'_, G>,
visit: VisitFn<G>,
ctx: &mut VisitorContext<G>,
) -> Result<ControlFlow<(), bool>> {
// any valid path will be found from this point
// if a valid path was found, then we know that the DeadEndsCache may have been updated,
@@ -189,8 +189,8 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
&mut self,
dest_node: Interned<QueryNode>,
edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
visit: VisitFn<'_, G>,
ctx: &mut VisitorContext<'_, G>,
visit: VisitFn<G>,
ctx: &mut VisitorContext<G>,
) -> Result<ControlFlow<(), bool>> {
if !ctx
.all_costs_from_node
@@ -228,8 +228,8 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
condition: Interned<G::Condition>,
dest_node: Interned<QueryNode>,
edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
visit: VisitFn<'_, G>,
ctx: &mut VisitorContext<'_, G>,
visit: VisitFn<G>,
ctx: &mut VisitorContext<G>,
) -> Result<ControlFlow<(), bool>> {
assert!(dest_node != ctx.graph.query_graph.end_node);

View File

@@ -33,7 +33,7 @@ impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
/// and inserted in the cache.
pub fn get_computed_condition<'s>(
&'s mut self,
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
interned_condition: Interned<G::Condition>,
graph: &mut RankingRuleGraph<G>,
universe: &RoaringBitmap,

View File

@@ -17,7 +17,7 @@ pub enum ExactnessCondition {
pub enum ExactnessGraph {}
fn compute_docids(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
dest_node: &LocatedQueryTermSubset,
universe: &RoaringBitmap,
) -> Result<RoaringBitmap> {
@@ -44,9 +44,8 @@ fn compute_docids(
impl RankingRuleGraphTrait for ExactnessGraph {
type Condition = ExactnessCondition;
#[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
fn resolve_condition(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
condition: &Self::Condition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
@@ -72,9 +71,8 @@ impl RankingRuleGraphTrait for ExactnessGraph {
})
}
#[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
fn build_edges(
_ctx: &mut SearchContext<'_>,
_ctx: &mut SearchContext,
conditions_interner: &mut DedupInterner<Self::Condition>,
_source_node: Option<&LocatedQueryTermSubset>,
dest_node: &LocatedQueryTermSubset,
@@ -88,7 +86,6 @@ impl RankingRuleGraphTrait for ExactnessGraph {
Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
}
#[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::ExactWords(score_details::ExactWords::from_rank(rank))
}

View File

@@ -20,9 +20,8 @@ pub enum FidGraph {}
impl RankingRuleGraphTrait for FidGraph {
type Condition = FidCondition;
#[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
fn resolve_condition(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
condition: &Self::Condition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
@@ -45,9 +44,8 @@ impl RankingRuleGraphTrait for FidGraph {
})
}
#[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
fn build_edges(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
conditions_interner: &mut DedupInterner<Self::Condition>,
_from: Option<&LocatedQueryTermSubset>,
to_term: &LocatedQueryTermSubset,
@@ -103,7 +101,6 @@ impl RankingRuleGraphTrait for FidGraph {
Ok(edges)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Fid(rank)
}

View File

@@ -99,14 +99,14 @@ pub trait RankingRuleGraphTrait: Sized + 'static {
/// Compute the document ids associated with the given edge condition,
/// restricted to the given universe.
fn resolve_condition(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
condition: &Self::Condition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition>;
/// Return the costs and conditions of the edges going from the source node to the destination node
fn build_edges(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
conditions_interner: &mut DedupInterner<Self::Condition>,
source_node: Option<&LocatedQueryTermSubset>,
dest_node: &LocatedQueryTermSubset,

View File

@@ -20,9 +20,8 @@ pub enum PositionGraph {}
impl RankingRuleGraphTrait for PositionGraph {
type Condition = PositionCondition;
#[tracing::instrument(level = "trace", skip_all, target = "search::position")]
fn resolve_condition(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
condition: &Self::Condition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
@@ -45,9 +44,8 @@ impl RankingRuleGraphTrait for PositionGraph {
})
}
#[tracing::instrument(level = "trace", skip_all, target = "search::position")]
fn build_edges(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
conditions_interner: &mut DedupInterner<Self::Condition>,
_from: Option<&LocatedQueryTermSubset>,
to_term: &LocatedQueryTermSubset,
@@ -119,7 +117,6 @@ impl RankingRuleGraphTrait for PositionGraph {
Ok(edges)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::position")]
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Position(rank)
}

View File

@@ -8,7 +8,7 @@ use crate::search::new::SearchContext;
use crate::Result;
pub fn build_edges(
_ctx: &mut SearchContext<'_>,
_ctx: &mut SearchContext,
conditions_interner: &mut DedupInterner<ProximityCondition>,
left_term: Option<&LocatedQueryTermSubset>,
right_term: &LocatedQueryTermSubset,

View File

@@ -13,7 +13,7 @@ use crate::search::new::{SearchContext, Word};
use crate::Result;
pub fn compute_docids(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
condition: &ProximityCondition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
@@ -110,7 +110,7 @@ pub fn compute_docids(
}
fn compute_prefix_edges(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
left_word: Interned<String>,
right_prefix: Interned<String>,
left_phrase: Option<Interned<Phrase>>,
@@ -166,7 +166,7 @@ fn compute_prefix_edges(
}
fn compute_non_prefix_edges(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
word1: Interned<String>,
word2: Interned<String>,
left_phrase: Option<Interned<Phrase>>,
@@ -209,7 +209,7 @@ fn compute_non_prefix_edges(
}
fn last_words_of_term_derivations(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
t: &QueryTermSubset,
) -> Result<BTreeSet<(Option<Interned<Phrase>>, Word)>> {
let mut result = BTreeSet::new();
@@ -228,7 +228,7 @@ fn last_words_of_term_derivations(
Ok(result)
}
fn first_word_of_term_iter(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
t: &QueryTermSubset,
) -> Result<BTreeSet<(Interned<String>, Option<Interned<Phrase>>)>> {
let mut result = BTreeSet::new();

View File

@@ -21,18 +21,16 @@ pub enum ProximityGraph {}
impl RankingRuleGraphTrait for ProximityGraph {
type Condition = ProximityCondition;
#[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
fn resolve_condition(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
condition: &Self::Condition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
compute_docids::compute_docids(ctx, condition, universe)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
fn build_edges(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
conditions_interner: &mut DedupInterner<Self::Condition>,
source_term: Option<&LocatedQueryTermSubset>,
dest_term: &LocatedQueryTermSubset,
@@ -40,7 +38,6 @@ impl RankingRuleGraphTrait for ProximityGraph {
build::build_edges(ctx, conditions_interner, source_term, dest_term)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Proximity(rank)
}

View File

@@ -19,9 +19,8 @@ pub enum TypoGraph {}
impl RankingRuleGraphTrait for TypoGraph {
type Condition = TypoCondition;
#[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
fn resolve_condition(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
condition: &Self::Condition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
@@ -38,9 +37,8 @@ impl RankingRuleGraphTrait for TypoGraph {
})
}
#[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
fn build_edges(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
conditions_interner: &mut DedupInterner<Self::Condition>,
_from: Option<&LocatedQueryTermSubset>,
to_term: &LocatedQueryTermSubset,
@@ -79,7 +77,6 @@ impl RankingRuleGraphTrait for TypoGraph {
Ok(edges)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Typo(score_details::Typo::from_rank(rank))
}

View File

@@ -18,9 +18,8 @@ pub enum WordsGraph {}
impl RankingRuleGraphTrait for WordsGraph {
type Condition = WordsCondition;
#[tracing::instrument(level = "trace", skip_all, target = "search::words")]
fn resolve_condition(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
condition: &Self::Condition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
@@ -37,9 +36,8 @@ impl RankingRuleGraphTrait for WordsGraph {
})
}
#[tracing::instrument(level = "trace", skip_all, target = "search::words")]
fn build_edges(
_ctx: &mut SearchContext<'_>,
_ctx: &mut SearchContext,
conditions_interner: &mut DedupInterner<Self::Condition>,
_from: Option<&LocatedQueryTermSubset>,
to_term: &LocatedQueryTermSubset,
@@ -47,7 +45,6 @@ impl RankingRuleGraphTrait for WordsGraph {
Ok(vec![(0, conditions_interner.insert(WordsCondition { term: to_term.clone() }))])
}
#[tracing::instrument(level = "trace", skip_all, target = "search::words")]
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Words(score_details::Words::from_rank(rank))
}

View File

@@ -30,7 +30,7 @@ impl<'ctx> SearchContext<'ctx> {
}
}
pub fn compute_query_term_subset_docids(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
term: &QueryTermSubset,
) -> Result<RoaringBitmap> {
let mut docids = RoaringBitmap::new();
@@ -53,7 +53,7 @@ pub fn compute_query_term_subset_docids(
}
pub fn compute_query_term_subset_docids_within_field_id(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
term: &QueryTermSubset,
fid: u16,
) -> Result<RoaringBitmap> {
@@ -86,7 +86,7 @@ pub fn compute_query_term_subset_docids_within_field_id(
}
pub fn compute_query_term_subset_docids_within_position(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
term: &QueryTermSubset,
position: u16,
) -> Result<RoaringBitmap> {
@@ -121,7 +121,7 @@ pub fn compute_query_term_subset_docids_within_position(
/// Returns the subset of the input universe that satisfies the contraints of the input query graph.
pub fn compute_query_graph_docids(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
q: &QueryGraph,
universe: &RoaringBitmap,
) -> Result<RoaringBitmap> {
@@ -178,7 +178,7 @@ pub fn compute_query_graph_docids(
}
pub fn compute_phrase_docids(
ctx: &mut SearchContext<'_>,
ctx: &mut SearchContext,
phrase: Interned<Phrase>,
) -> Result<RoaringBitmap> {
let Phrase { words } = ctx.phrase_interner.get(phrase).clone();

View File

@@ -56,7 +56,7 @@ pub struct Sort<'ctx, Query> {
impl<'ctx, Query> Sort<'ctx, Query> {
pub fn new(
index: &Index,
rtxn: &'ctx heed::RoTxn<'ctx>,
rtxn: &'ctx heed::RoTxn,
field_name: String,
is_ascending: bool,
) -> Result<Self> {
@@ -74,7 +74,7 @@ impl<'ctx, Query> Sort<'ctx, Query> {
})
}
fn must_redact(index: &Index, rtxn: &'ctx heed::RoTxn<'ctx>, field_name: &str) -> Result<bool> {
fn must_redact(index: &Index, rtxn: &'ctx heed::RoTxn, field_name: &str) -> Result<bool> {
let Some(displayed_fields) = index.displayed_fields(rtxn)? else {
return Ok(false);
};
@@ -88,8 +88,6 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
let Self { field_name, is_ascending, .. } = self;
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" })
}
#[tracing::instrument(level = "trace", skip_all, target = "search::sort")]
fn start_iteration(
&mut self,
ctx: &mut SearchContext<'ctx>,
@@ -97,7 +95,7 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
parent_candidates: &RoaringBitmap,
parent_query: &Query,
) -> Result<()> {
let iter: RankingRuleOutputIterWrapper<'ctx, Query> = match self.field_id {
let iter: RankingRuleOutputIterWrapper<Query> = match self.field_id {
Some(field_id) => {
let number_db = ctx
.index
@@ -188,7 +186,6 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "search::sort")]
fn next_bucket(
&mut self,
_ctx: &mut SearchContext<'ctx>,
@@ -214,7 +211,6 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
}
}
#[tracing::instrument(level = "trace", skip_all, target = "search::sort")]
fn end_iteration(
&mut self,
_ctx: &mut SearchContext<'ctx>,

View File

@@ -207,7 +207,7 @@ fn create_index() -> TempIndex {
fn verify_distinct(
index: &Index,
txn: &RoTxn<'_>,
txn: &RoTxn,
distinct: Option<&str>,
docids: &[u32],
) -> Vec<String> {

Some files were not shown because too many files have changed in this diff Show More