Compare commits

..

11 Commits

Author SHA1 Message Date
Clément Renault
af2b722fed Make the CI happy about the never type 2024-09-16 11:40:26 +02:00
Clément Renault
8cb7001755 Expose an experimental parameter to control the generation of prefix dbs 2024-09-16 10:57:52 +02:00
meili-bors[bot]
882663bf7f Merge #4891
4891: Update version for the next release (v1.9.1) in Cargo.toml r=dureuill a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: dureuill <dureuill@users.noreply.github.com>
2024-08-27 16:04:18 +00:00
dureuill
3234f63c00 Update version for the next release (v1.9.1) in Cargo.toml 2024-08-27 16:02:43 +00:00
meili-bors[bot]
9fff081043 Merge #4889
4889: When `retrieveVectors` is true, retrieve `_vectors.embedder` even if … r=Kerollmops a=dureuill

…there are no vector for that embedder


backports a bug fix from v1.10.0: 82647bcded

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-08-27 15:14:58 +00:00
Louis Dureuil
575b7b7a0b Fix tests 2024-08-27 17:14:10 +02:00
Louis Dureuil
6287f5b204 Remove unexecuted test 2024-08-27 16:54:33 +02:00
Louis Dureuil
5dac8e7168 Allow fuzzing cfg 2024-08-27 16:43:44 +02:00
Louis Dureuil
e669af1e49 CI: Add ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION workaround to keep using Ubuntu 18.04 2024-08-27 16:29:45 +02:00
Louis Dureuil
0e0e29459c Update time 2024-08-27 16:27:05 +02:00
Louis Dureuil
c25f7e3450 When retrieveVectors is true, retrieve _vectors.embedder even if there are no vector for that embedder 2024-08-27 16:26:41 +02:00
172 changed files with 2037 additions and 4661 deletions

View File

@@ -18,9 +18,11 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
run: |

View File

@@ -35,9 +35,11 @@ jobs:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run benchmarks on PR ${{ github.event.issue.id }}
run: |

View File

@@ -12,9 +12,11 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}

View File

@@ -18,9 +18,11 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name

View File

@@ -13,9 +13,11 @@ jobs:
runs-on: benchmarks
timeout-minutes: 4320 # 72h
steps:
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Check for Command
id: command

View File

@@ -16,9 +16,11 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name

View File

@@ -15,9 +15,11 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name

View File

@@ -15,9 +15,11 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name

View File

@@ -15,9 +15,11 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name

View File

@@ -18,7 +18,10 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps

View File

@@ -13,9 +13,11 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Run benchmarks
- name: Run the fuzzer

View File

@@ -27,7 +27,10 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v3

View File

@@ -47,7 +47,10 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
@@ -77,7 +80,10 @@ jobs:
asset_name: meilisearch-windows-amd64.exe
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
@@ -103,10 +109,12 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v3
- name: Installing Rust toolchain
uses: helix-editor/rust-toolchain@v1
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
target: ${{ matrix.target }}
override: true
- name: Cargo build
uses: actions-rs/cargo@v1
with:
@@ -150,10 +158,12 @@ jobs:
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain
uses: helix-editor/rust-toolchain@v1
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
target: ${{ matrix.target }}
override: true
- name: Configure target aarch64 GNU
## Environment variable is not passed using env:
## LD gold won't work with MUSL

View File

@@ -80,11 +80,10 @@ jobs:
type=ref,event=tag
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Build and push
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
push: true
platforms: linux/amd64,linux/arm64

View File

@@ -33,7 +33,10 @@ jobs:
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Setup test with Rust stable
uses: helix-editor/rust-toolchain@v1
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
- name: Run cargo check without any default features
@@ -58,7 +61,10 @@ jobs:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -85,7 +91,10 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
@@ -107,7 +116,10 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo tree without default features and check lindera is not present
run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
@@ -133,7 +145,10 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
- name: Run tests in debug
@@ -147,9 +162,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: 1.75.0
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
@@ -164,10 +181,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: nightly-2024-07-09
toolchain: nightly
override: true
components: rustfmt
- name: Cache dependencies

View File

@@ -18,9 +18,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: helix-editor/rust-toolchain@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Install sd
run: cargo install sd
- name: Update Cargo.toml file

View File

@@ -109,12 +109,6 @@ They are JSON files with the following structure (comments are not actually supp
"run_count": 3,
// List of arguments to add to the Meilisearch command line.
"extra_cli_args": ["--max-indexing-threads=1"],
// An expression that can be parsed as a comma-separated list of targets and levels
// as described in [tracing_subscriber's documentation](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/targets/struct.Targets.html#examples).
// The expression is used to filter the spans that are measured for profiling purposes.
// Optional, defaults to "indexing::=trace" (for indexing workloads), common other values is
// "search::=trace"
"target": "indexing::=trace",
// List of named assets that can be used in the commands.
"assets": {
// name of the asset.

View File

@@ -52,16 +52,6 @@ cargo test
This command will be triggered to each PR as a requirement for merging it.
#### Faster build
You can set the `LINDERA_CACHE` environment variable to speed up your successive builds by up to 2 minutes.
It'll store some built artifacts in the directory of your choice.
We recommend using the standard `$HOME/.cache/lindera` directory:
```sh
export LINDERA_CACHE=$HOME/.cache/lindera
```
#### Snapshot-based tests
We are using [insta](https://insta.rs) to perform snapshot-based testing.
@@ -73,7 +63,7 @@ Furthermore, we provide some macros on top of insta, notably a way to use snapsh
To effectively debug snapshot-based hashes, we recommend you export the `MEILI_TEST_FULL_SNAPS` environment variable so that snapshot are fully created locally:
```sh
```
export MEILI_TEST_FULL_SNAPS=true # add this to your .bashrc, .zshrc, ...
```

1556
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -22,7 +22,7 @@ members = [
]
[workspace.package]
version = "1.9.0"
version = "1.9.1"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -1,7 +1,7 @@
# Compile
FROM rust:1.79.0-alpine3.20 AS compiler
FROM rust:1.75.0-alpine3.18 AS compiler
RUN apk add -q --no-cache build-base openssl-dev
RUN apk add -q --update-cache --no-cache build-base openssl-dev
WORKDIR /
@@ -20,12 +20,13 @@ RUN set -eux; \
cargo build --release -p meilisearch -p meilitool
# Run
FROM alpine:3.20
FROM alpine:3.16
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker
RUN apk add -q --no-cache libgcc tini curl
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
# and it's easy to find.

View File

@@ -1,6 +1,9 @@
<p align="center">
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo" target="_blank">
<img src="assets/meilisearch-logo-kawaii.png">
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
</a>
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
</a>
</p>
@@ -22,7 +25,7 @@
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
[Meilisearch](https://www.meilisearch.com?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=intro) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
<p align="center" name="demo">
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
@@ -33,18 +36,11 @@
</a>
</p>
## 🖥 Examples
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
See the list of all our example apps in our [demos repository](https://github.com/meilisearch/demos).
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
## ✨ Features
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results
- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
@@ -63,7 +59,7 @@ You can consult Meilisearch's documentation at [meilisearch.com/docs](https://ww
## 🚀 Getting started
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [documentation](https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
## 🌍 Supercharge your Meilisearch experience
@@ -87,7 +83,7 @@ Finally, for more in-depth information, refer to our articles explaining fundame
## 📊 Telemetry
Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
@@ -109,11 +105,11 @@ Thank you for your support!
## 👩‍💻 Contributing
Meilisearch is, and will always be, open-source! If you want to contribute to the project, please look at [our contribution guidelines](CONTRIBUTING.md).
Meilisearch is, and will always be, open-source! If you want to contribute to the project, please take a look at [our contribution guidelines](CONTRIBUTING.md).
## 📦 Versioning
Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases).
Meilisearch releases and their associated binaries are available [in this GitHub page](https://github.com/meilisearch/meilisearch/releases).
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

View File

@@ -11,24 +11,24 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.86"
anyhow = "1.0.79"
csv = "1.3.0"
milli = { path = "../milli" }
mimalloc = { version = "0.1.43", default-features = false }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
mimalloc = { version = "0.1.39", default-features = false }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
rand = "0.8.5"
rand_chacha = "0.3.1"
roaring = "0.10.6"
roaring = "0.10.2"
[build-dependencies]
anyhow = "1.0.86"
bytes = "1.6.0"
anyhow = "1.0.79"
bytes = "1.5.0"
convert_case = "0.6.0"
flate2 = "1.0.30"
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false }
flate2 = "1.0.28"
reqwest = { version = "0.11.23", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]

View File

@@ -11,8 +11,8 @@ license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
time = { version = "0.3.36", features = ["parsing"] }
time = { version = "0.3.34", features = ["parsing"] }
[build-dependencies]
anyhow = "1.0.86"
vergen-git2 = "1.0.0"
anyhow = "1.0.80"
vergen-git2 = "1.0.0-beta.2"

View File

@@ -11,21 +11,22 @@ readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.86"
flate2 = "1.0.30"
http = "1.1.0"
anyhow = "1.0.79"
flate2 = "1.0.28"
http = "0.2.11"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0"
regex = "1.10.5"
roaring = { version = "0.10.6", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
tar = "0.4.41"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
regex = "1.10.2"
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"

View File

@@ -425,7 +425,7 @@ pub(crate) mod test {
let mut dump = v2::V2Reader::open(dir).unwrap().to_v3();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@@ -358,7 +358,7 @@ pub(crate) mod test {
let mut dump = v3::V3Reader::open(dir).unwrap().to_v4();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@@ -394,8 +394,8 @@ pub(crate) mod test {
let mut dump = v4::V4Reader::open(dir).unwrap().to_v5();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@@ -442,8 +442,8 @@ pub(crate) mod test {
let mut dump = v5::V5Reader::open(dir).unwrap().to_v6();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();

View File

@@ -216,7 +216,7 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// tasks
@@ -337,7 +337,7 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// tasks
@@ -383,8 +383,8 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
@@ -463,8 +463,8 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
@@ -540,7 +540,7 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// tasks
@@ -633,7 +633,7 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// tasks
@@ -726,7 +726,7 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// tasks

View File

@@ -252,7 +252,7 @@ pub(crate) mod test {
let mut dump = V2Reader::open(dir).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
@@ -349,7 +349,7 @@ pub(crate) mod test {
let mut dump = V2Reader::open(dir).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@@ -267,7 +267,7 @@ pub(crate) mod test {
let mut dump = V3Reader::open(dir).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@@ -262,8 +262,8 @@ pub(crate) mod test {
let mut dump = V4Reader::open(dir).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@@ -299,8 +299,8 @@ pub(crate) mod test {
let mut dump = V5Reader::open(dir).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@@ -281,7 +281,7 @@ pub(crate) mod test {
let dump_path = dump.path();
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###"
insta::assert_display_snapshot!(create_directory_hierarchy(dump_path), @r###"
.
├---- indexes/
│ └---- doggos/

View File

@@ -11,7 +11,10 @@ edition.workspace = true
license.workspace = true
[dependencies]
tempfile = "3.10.1"
thiserror = "1.0.61"
tempfile = "3.9.0"
thiserror = "1.0.56"
tracing = "0.1.40"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies]
faux = "0.1.10"

View File

@@ -14,7 +14,7 @@ license.workspace = true
[dependencies]
nom = "7.1.3"
nom_locate = "4.2.0"
unescaper = "0.1.5"
unescaper = "0.1.3"
[dev-dependencies]
insta = "1.39.0"
insta = "1.34.0"

View File

@@ -564,121 +564,121 @@ pub mod tests {
#[test]
fn parse_escaped() {
insta::assert_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
insta::assert_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
insta::assert_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
insta::assert_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequences
insta::assert_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
}
#[test]
fn parse() {
// Test equal
insta::assert_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
insta::assert_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
insta::assert_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}");
insta::assert_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}");
insta::assert_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}");
insta::assert_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}");
insta::assert_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}");
insta::assert_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
insta::assert_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
insta::assert_display_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}");
insta::assert_display_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}");
insta::assert_display_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}");
insta::assert_display_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}");
insta::assert_display_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}");
insta::assert_display_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
insta::assert_display_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
// Test IN
insta::assert_snapshot!(p("colour IN[]"), @"{colour} IN[]");
insta::assert_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]");
insta::assert_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]");
insta::assert_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])");
insta::assert_snapshot!(p(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]");
insta::assert_display_snapshot!(p("colour IN[]"), @"{colour} IN[]");
insta::assert_display_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]");
insta::assert_display_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]");
insta::assert_display_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])");
insta::assert_display_snapshot!(p(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]");
// Test IN + OR/AND/()
insta::assert_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
insta::assert_snapshot!(p("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]");
insta::assert_snapshot!(p("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]");
insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
insta::assert_display_snapshot!(p("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]");
insta::assert_display_snapshot!(p("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]");
// Test whitespace start/end
insta::assert_snapshot!(p(" colour = green "), @"{colour} = {green}");
insta::assert_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]");
insta::assert_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
insta::assert_snapshot!(p(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])");
insta::assert_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]");
insta::assert_display_snapshot!(p(" colour = green "), @"{colour} = {green}");
insta::assert_display_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]");
insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
insta::assert_display_snapshot!(p(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])");
insta::assert_display_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]");
// Test conditions
insta::assert_snapshot!(p("channel != ponce"), @"{channel} != {ponce}");
insta::assert_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})");
insta::assert_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}");
insta::assert_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}");
insta::assert_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
insta::assert_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}");
insta::assert_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
insta::assert_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
insta::assert_display_snapshot!(p("channel != ponce"), @"{channel} != {ponce}");
insta::assert_display_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})");
insta::assert_display_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}");
insta::assert_display_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}");
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
insta::assert_display_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}");
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
// Test NOT
insta::assert_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
insta::assert_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
// Test NULL + NOT NULL
insta::assert_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
insta::assert_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
insta::assert_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
insta::assert_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
insta::assert_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
// Test EMPTY + NOT EMPTY
insta::assert_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
insta::assert_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
insta::assert_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
insta::assert_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
insta::assert_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
// Test EXISTS + NOT EXITS
insta::assert_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
insta::assert_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
insta::assert_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
// Test nested NOT
insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
insta::assert_display_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
// Test geo radius
insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
insta::assert_display_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
insta::assert_display_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
insta::assert_display_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
// Test geo bounding box
insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
insta::assert_display_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
insta::assert_display_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
insta::assert_display_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
// Test OR + AND
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]");
insta::assert_snapshot!(
insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
insta::assert_display_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]");
insta::assert_display_snapshot!(
p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"),
@"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]"
);
// Test parentheses
insta::assert_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]");
insta::assert_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]");
insta::assert_display_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]");
insta::assert_display_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]");
// Test recursion
// This is the most that is allowed
insta::assert_snapshot!(
insta::assert_display_snapshot!(
p("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"),
@"{x} = {1}"
);
insta::assert_snapshot!(
insta::assert_display_snapshot!(
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
@"NOT ({x} = {1})"
);
// Confusing keywords
insta::assert_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]");
insta::assert_display_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]");
}
#[test]
@@ -689,182 +689,182 @@ pub mod tests {
Fc::parse(s).unwrap_err().to_string()
}
insta::assert_snapshot!(p("channel = Ponce = 12"), @r###"
insta::assert_display_snapshot!(p("channel = Ponce = 12"), @r###"
Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule.
17:21 channel = Ponce = 12
"###);
insta::assert_snapshot!(p("channel = "), @r###"
insta::assert_display_snapshot!(p("channel = "), @r###"
Was expecting a value but instead got nothing.
14:14 channel =
"###);
insta::assert_snapshot!(p("channel = 🐻"), @r###"
insta::assert_display_snapshot!(p("channel = 🐻"), @r###"
Was expecting a value but instead got `🐻`.
11:12 channel = 🐻
"###);
insta::assert_snapshot!(p("channel = 🐻 AND followers < 100"), @r###"
insta::assert_display_snapshot!(p("channel = 🐻 AND followers < 100"), @r###"
Was expecting a value but instead got `🐻`.
11:12 channel = 🐻 AND followers < 100
"###);
insta::assert_snapshot!(p("'OR'"), @r###"
insta::assert_display_snapshot!(p("'OR'"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
1:5 'OR'
"###);
insta::assert_snapshot!(p("OR"), @r###"
insta::assert_display_snapshot!(p("OR"), @r###"
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
1:3 OR
"###);
insta::assert_snapshot!(p("channel Ponce"), @r###"
insta::assert_display_snapshot!(p("channel Ponce"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
1:14 channel Ponce
"###);
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
19:19 channel = Ponce OR
"###);
insta::assert_snapshot!(p("_geoRadius"), @r###"
insta::assert_display_snapshot!(p("_geoRadius"), @r###"
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
1:11 _geoRadius
"###);
insta::assert_snapshot!(p("_geoRadius = 12"), @r###"
insta::assert_display_snapshot!(p("_geoRadius = 12"), @r###"
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
1:16 _geoRadius = 12
"###);
insta::assert_snapshot!(p("_geoBoundingBox"), @r###"
insta::assert_display_snapshot!(p("_geoBoundingBox"), @r###"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:16 _geoBoundingBox
"###);
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r###"
insta::assert_display_snapshot!(p("_geoBoundingBox = 12"), @r###"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:21 _geoBoundingBox = 12
"###);
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
insta::assert_display_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:26 _geoBoundingBox(1.0, 1.0)
"###);
insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
1:22 _geoPoint(12, 13, 14)
"###);
insta::assert_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
13:34 position <= _geoPoint(12, 13, 14)
"###);
insta::assert_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
insta::assert_display_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
1:25 _geoDistance(12, 13, 14)
"###);
insta::assert_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
insta::assert_display_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
13:37 position <= _geoDistance(12, 13, 14)
"###);
insta::assert_snapshot!(p("_geo(12, 13, 14)"), @r###"
insta::assert_display_snapshot!(p("_geo(12, 13, 14)"), @r###"
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
1:17 _geo(12, 13, 14)
"###);
insta::assert_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
insta::assert_display_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
13:29 position <= _geo(12, 13, 14)
"###);
insta::assert_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
The `_geoRadius` filter is an operation and can't be used as a value.
13:35 position <= _geoRadius(12, 13, 14)
"###);
insta::assert_snapshot!(p("channel = 'ponce"), @r###"
insta::assert_display_snapshot!(p("channel = 'ponce"), @r###"
Expression `\'ponce` is missing the following closing delimiter: `'`.
11:17 channel = 'ponce
"###);
insta::assert_snapshot!(p("channel = \"ponce"), @r###"
insta::assert_display_snapshot!(p("channel = \"ponce"), @r###"
Expression `\"ponce` is missing the following closing delimiter: `"`.
11:17 channel = "ponce
"###);
insta::assert_snapshot!(p("channel = mv OR (followers >= 1000"), @r###"
insta::assert_display_snapshot!(p("channel = mv OR (followers >= 1000"), @r###"
Expression `(followers >= 1000` is missing the following closing delimiter: `)`.
17:35 channel = mv OR (followers >= 1000
"###);
insta::assert_snapshot!(p("channel = mv OR followers >= 1000)"), @r###"
insta::assert_display_snapshot!(p("channel = mv OR followers >= 1000)"), @r###"
Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule.
34:35 channel = mv OR followers >= 1000)
"###);
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
1:17 colour NOT EXIST
"###);
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
1:23 subscribers 100 TO1000
"###);
insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
insta::assert_display_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
17:44 channel = ponce ORdog != 'bernese mountain'
"###);
insta::assert_snapshot!(p("colour IN blue, green]"), @r###"
insta::assert_display_snapshot!(p("colour IN blue, green]"), @r###"
Expected `[` after `IN` keyword.
11:23 colour IN blue, green]
"###);
insta::assert_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###"
insta::assert_display_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###"
Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`.
32:36 colour IN [blue, green, 'blue' > 2]
"###);
insta::assert_snapshot!(p("colour IN [blue, green, AND]"), @r###"
insta::assert_display_snapshot!(p("colour IN [blue, green, AND]"), @r###"
Expected only comma-separated field names inside `IN[..]` but instead found `AND]`.
25:29 colour IN [blue, green, AND]
"###);
insta::assert_snapshot!(p("colour IN [blue, green"), @r###"
insta::assert_display_snapshot!(p("colour IN [blue, green"), @r###"
Expected matching `]` after the list of field names given to `IN[`
23:23 colour IN [blue, green
"###);
insta::assert_snapshot!(p("colour IN ['blue, green"), @r###"
insta::assert_display_snapshot!(p("colour IN ['blue, green"), @r###"
Expression `\'blue, green` is missing the following closing delimiter: `'`.
12:24 colour IN ['blue, green
"###);
insta::assert_snapshot!(p("x = EXISTS"), @r###"
insta::assert_display_snapshot!(p("x = EXISTS"), @r###"
Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes.
5:11 x = EXISTS
"###);
insta::assert_snapshot!(p("AND = 8"), @r###"
insta::assert_display_snapshot!(p("AND = 8"), @r###"
Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes.
1:4 AND = 8
"###);
insta::assert_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###"
insta::assert_display_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###"
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
51:106 ((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))
"###);
insta::assert_snapshot!(
insta::assert_display_snapshot!(
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
@r###"
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
@@ -872,40 +872,40 @@ pub mod tests {
"###
);
insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
insta::assert_display_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
"###);
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
insta::assert_display_snapshot!(p(r#"value NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
1:11 value NULL
"###);
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
1:15 value NOT NULL
"###);
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
1:12 value EMPTY
"###);
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
1:16 value NOT EMPTY
"###);
insta::assert_snapshot!(p(r#"value IS"#), @r###"
insta::assert_display_snapshot!(p(r#"value IS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
1:9 value IS
"###);
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
1:13 value IS NOT
"###);
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
1:16 value IS EXISTS
"###);
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
1:20 value IS NOT EXISTS
"###);

View File

@@ -12,9 +12,9 @@ license.workspace = true
[dependencies]
arbitrary = { version = "1.3.2", features = ["derive"] }
clap = { version = "4.5.9", features = ["derive"] }
fastrand = "2.1.0"
clap = { version = "4.4.17", features = ["derive"] }
fastrand = "2.0.1"
milli = { path = "../milli" }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
tempfile = "3.10.1"
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
tempfile = "3.9.0"

View File

@@ -110,7 +110,7 @@ fn main() {
// after executing a batch we check if the database is corrupted
let res = index.search(&wtxn).execute().unwrap();
index.compressed_documents(&wtxn, res.documents_ids).unwrap();
index.documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed);
}
wtxn.abort();

View File

@@ -11,38 +11,38 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.86"
anyhow = "1.0.79"
bincode = "1.3.3"
csv = "1.3.0"
derive_builder = "0.20.0"
derive_builder = "0.12.0"
dump = { path = "../dump" }
enum-iterator = "2.1.0"
enum-iterator = "1.5.0"
file-store = { path = "../file-store" }
flate2 = "1.0.30"
flate2 = "1.0.28"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.6.0"
rayon = "1.10.0"
roaring = { version = "0.10.6", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
page_size = "0.5.0"
rayon = "1.8.1"
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
synchronoise = "1.0.1"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = [
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tracing = "0.1.40"
ureq = "2.10.0"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
ureq = "2.9.7"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies]
arroy = "0.4.0"
big_s = "1.0.2"
crossbeam = "0.8.4"
insta = { version = "1.39.0", features = ["json", "redactions"] }
insta = { version = "1.34.0", features = ["json", "redactions"] }
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }

View File

@@ -897,97 +897,95 @@ impl IndexScheduler {
dump_tasks.flush()?;
// 3. Dump the indexes
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
let rtxn = index.read_txn()?;
let metadata = IndexMetadata {
uid: uid.to_owned(),
primary_key: index.primary_key(&rtxn)?.map(String::from),
created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at(&rtxn)?,
};
let mut index_dumper = dump.create_index(uid, &metadata)?;
let () =
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
let rtxn = index.read_txn()?;
let metadata = IndexMetadata {
uid: uid.to_owned(),
primary_key: index.primary_key(&rtxn)?.map(String::from),
created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at(&rtxn)?,
};
let mut index_dumper = dump.create_index(uid, &metadata)?;
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let dictionary = index.document_decompression_dictionary(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index.embedding_configs(&rtxn)?;
let mut buffer = Vec::new();
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index.embedding_configs(&rtxn)?;
// 3.1. Dump the documents
for ret in index.all_compressed_documents(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (id, compressed) = ret?;
let doc = compressed.decompress_with_optional_dictionary(
&mut buffer,
dictionary.as_ref(),
)?;
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
'inject_vectors: {
let embeddings = index.embeddings(&rtxn, id)?;
if embeddings.is_empty() {
break 'inject_vectors;
// 3.1. Dump the documents
for ret in index.all_documents(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
.or_insert(serde_json::Value::Object(Default::default()));
let (id, doc) = ret?;
let serde_json::Value::Object(vectors) = vectors else {
return Err(milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = index
.external_id_of(&rtxn, std::iter::once(id))
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={id}")
}
let mut document =
milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
'inject_vectors: {
let embeddings = index.embeddings(&rtxn, id)?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
return Err(milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = index
.external_id_of(&rtxn, std::iter::once(id))
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={id}")
}
},
value: vectors.clone(),
},
value: vectors.clone(),
},
)
.into());
};
for (embedder_name, embeddings) in embeddings {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == embedder_name)
.is_some_and(|conf| conf.user_provided.contains(id));
let embeddings = ExplicitVectors {
embeddings: Some(
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
),
regenerate: !user_provided,
)
.into());
};
vectors.insert(
embedder_name,
serde_json::to_value(embeddings).unwrap(),
);
for (embedder_name, embeddings) in embeddings {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == embedder_name)
.is_some_and(|conf| conf.user_provided.contains(id));
let embeddings = ExplicitVectors {
embeddings: Some(
VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
),
),
regenerate: !user_provided,
};
vectors.insert(
embedder_name,
serde_json::to_value(embeddings).unwrap(),
);
}
}
index_dumper.push_document(&document)?;
}
index_dumper.push_document(&document)?;
}
// 3.2. Dump the settings
let settings = meilisearch_types::settings::settings(
index,
&rtxn,
meilisearch_types::settings::SecretPolicy::RevealSecrets,
)?;
index_dumper.settings(&settings)?;
Ok(())
})?;
// 3.2. Dump the settings
let settings = meilisearch_types::settings::settings(
index,
&rtxn,
meilisearch_types::settings::SecretPolicy::RevealSecrets,
)?;
index_dumper.settings(&settings)?;
Ok(())
})?;
// 4. Dump experimental feature settings
let features = self.features().runtime_features();
@@ -1294,7 +1292,11 @@ impl IndexScheduler {
}
}
let config = IndexDocumentsConfig { update_method: method, ..Default::default() };
let config = IndexDocumentsConfig {
update_method: method,
compute_prefix_databases: self.compute_prefix_databases,
..Default::default()
};
let embedder_configs = index.embedding_configs(index_wtxn)?;
// TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
@@ -1404,6 +1406,7 @@ impl IndexScheduler {
let deleted_documents = delete_document_by_filter(
index_wtxn,
filter,
self.compute_prefix_databases,
self.index_mapper.indexer_config(),
self.must_stop_processing.clone(),
index,
@@ -1644,6 +1647,7 @@ impl IndexScheduler {
fn delete_document_by_filter<'a>(
wtxn: &mut RwTxn<'a>,
filter: &serde_json::Value,
compute_prefix_databases: bool,
indexer_config: &IndexerConfig,
must_stop_processing: MustStopProcessing,
index: &'a Index,
@@ -1659,6 +1663,7 @@ fn delete_document_by_filter<'a>(
let config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
compute_prefix_databases,
..Default::default()
};

View File

@@ -32,6 +32,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
features: _,
max_number_of_tasks: _,
max_number_of_batched_tasks: _,
compute_prefix_databases: _,
wake_up: _,
dumps_path: _,
snapshots_path: _,

View File

@@ -276,6 +276,8 @@ pub struct IndexSchedulerOptions {
pub max_number_of_batched_tasks: usize,
/// The experimental features enabled for this instance.
pub instance_features: InstanceTogglableFeatures,
/// An experimental option to control the generation of prefix databases.
pub compute_prefix_databases: bool,
}
/// Structure which holds meilisearch's indexes and schedules the tasks
@@ -283,19 +285,13 @@ pub struct IndexSchedulerOptions {
pub struct IndexScheduler {
/// The LMDB environment which the DBs are associated with.
pub(crate) env: Env,
/// A boolean that can be set to true to stop the currently processing tasks.
pub(crate) must_stop_processing: MustStopProcessing,
/// The list of tasks currently processing
pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>,
/// The list of files referenced by the tasks
pub(crate) file_store: FileStore,
// The main database, it contains all the tasks accessible by their Id.
pub(crate) file_store: FileStore, // The main database, it contains all the tasks accessible by their Id.
pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,
/// All the tasks ids grouped by their status.
// TODO we should not be able to serialize a `Status::Processing` in this database.
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
@@ -303,58 +299,43 @@ pub struct IndexScheduler {
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
/// Store the tasks associated to an index.
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
/// Store the tasks that were canceled by a task uid
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
/// Store the task ids of tasks which were enqueued at a specific date
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the task ids of finished tasks which started being processed at a specific date
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the task ids of tasks which finished at a specific date
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
/// In charge of creating, opening, storing and returning indexes.
pub(crate) index_mapper: IndexMapper,
/// In charge of fetching and setting the status of experimental features.
features: features::FeatureData,
/// Get a signal when a batch needs to be processed.
pub(crate) wake_up: Arc<SignalEvent>,
/// Whether auto-batching is enabled or not.
pub(crate) autobatching_enabled: bool,
/// Whether we should automatically cleanup the task queue or not.
pub(crate) cleanup_enabled: bool,
/// The max number of tasks allowed before the scheduler starts to delete
/// the finished tasks automatically.
pub(crate) max_number_of_tasks: usize,
/// The maximum number of tasks that will be batched together.
pub(crate) max_number_of_batched_tasks: usize,
/// Control wether we must generate the prefix databases or not.
pub(crate) compute_prefix_databases: bool,
/// The webhook url we should send tasks to after processing every batches.
pub(crate) webhook_url: Option<String>,
/// The Authorization header to send to the webhook URL.
pub(crate) webhook_authorization_header: Option<String>,
/// The path used to create the dumps.
pub(crate) dumps_path: PathBuf,
/// The path used to create the snapshots.
pub(crate) snapshots_path: PathBuf,
/// The path to the folder containing the auth LMDB env.
pub(crate) auth_path: PathBuf,
/// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf,
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
// ================= test
@@ -364,13 +345,11 @@ pub struct IndexScheduler {
/// See [self.breakpoint()](`IndexScheduler::breakpoint`) for an explanation.
#[cfg(test)]
test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
/// A list of planned failures within the [`tick`](IndexScheduler::tick) method of the index scheduler.
///
/// The first field is the iteration index and the second field identifies a location in the code.
#[cfg(test)]
planned_failures: Vec<(usize, tests::FailureLocation)>,
/// A counter that is incremented before every call to [`tick`](IndexScheduler::tick)
#[cfg(test)]
run_loop_iteration: Arc<RwLock<usize>>,
@@ -397,6 +376,7 @@ impl IndexScheduler {
cleanup_enabled: self.cleanup_enabled,
max_number_of_tasks: self.max_number_of_tasks,
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
compute_prefix_databases: self.compute_prefix_databases,
snapshots_path: self.snapshots_path.clone(),
dumps_path: self.dumps_path.clone(),
auth_path: self.auth_path.clone(),
@@ -499,6 +479,7 @@ impl IndexScheduler {
cleanup_enabled: options.cleanup_enabled,
max_number_of_tasks: options.max_number_of_tasks,
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
compute_prefix_databases: options.compute_prefix_databases,
dumps_path: options.dumps_path,
snapshots_path: options.snapshots_path,
auth_path: options.auth_path,
@@ -1811,7 +1792,7 @@ mod tests {
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
enable_mdb_writemap: false,
index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB
index_growth_amount: 1000 * 1000, // 1 MB
index_count: 5,
indexer_config,
autobatching_enabled: true,
@@ -1819,6 +1800,7 @@ mod tests {
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: usize::MAX,
instance_features: Default::default(),
compute_prefix_databases: true,
};
configuration(&mut options);
@@ -2465,20 +2447,12 @@ mod tests {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -2533,20 +2507,12 @@ mod tests {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -2920,20 +2886,12 @@ mod tests {
// has everything being pushed successfully in milli?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -2979,20 +2937,12 @@ mod tests {
// has everything being pushed successfully in milli?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -3043,20 +2993,12 @@ mod tests {
// has everything being pushed successfully in milli?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -3169,20 +3111,12 @@ mod tests {
// has everything being pushed successfully in milli?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -3232,20 +3166,12 @@ mod tests {
// has everything being pushed successfully in milli?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -3954,20 +3880,12 @@ mod tests {
// Has everything being pushed successfully in milli?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -4033,20 +3951,12 @@ mod tests {
// Has everything being pushed successfully in milli?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -4109,20 +4019,12 @@ mod tests {
// Has everything being pushed successfully in milli?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -4178,20 +4080,12 @@ mod tests {
// Has everything being pushed successfully in milli?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -4247,8 +4141,6 @@ mod tests {
// Is the primary key still what we expect?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
snapshot!(primary_key, @"id");
@@ -4256,15 +4148,9 @@ mod tests {
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -4316,8 +4202,6 @@ mod tests {
// Is the primary key still what we expect?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
snapshot!(primary_key, @"id");
@@ -4325,15 +4209,9 @@ mod tests {
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -4407,8 +4285,6 @@ mod tests {
// Is the primary key still what we expect?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
snapshot!(primary_key, @"id");
@@ -4416,15 +4292,9 @@ mod tests {
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -4501,8 +4371,6 @@ mod tests {
// Is the primary key still what we expect?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
snapshot!(primary_key, @"paw");
@@ -4510,15 +4378,9 @@ mod tests {
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -4588,8 +4450,6 @@ mod tests {
// Is the primary key still what we expect?
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
snapshot!(primary_key, @"doggoid");
@@ -4597,15 +4457,9 @@ mod tests {
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
@@ -5248,8 +5102,6 @@ mod tests {
{
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
// Ensure the document have been inserted into the relevant bitamp
let configs = index.embedding_configs(&rtxn).unwrap();
@@ -5269,12 +5121,8 @@ mod tests {
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
let (_id, compressed_doc) =
index.compressed_documents(&rtxn, std::iter::once(0)).unwrap().remove(0);
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
let doc = obkv_to_json(
&[
fields_ids_map.id("doggo").unwrap(),
@@ -5328,8 +5176,6 @@ mod tests {
{
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
// Ensure the document have been inserted into the relevant bitamp
let configs = index.embedding_configs(&rtxn).unwrap();
@@ -5352,12 +5198,8 @@ mod tests {
// remained beagle
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
let (_id, compressed_doc) =
index.compressed_documents(&rtxn, std::iter::once(0)).unwrap().remove(0);
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
let doc = obkv_to_json(
&[
fields_ids_map.id("doggo").unwrap(),
@@ -5449,20 +5291,12 @@ mod tests {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push");
@@ -5496,20 +5330,12 @@ mod tests {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
// the all the vectors linked to the new specified embedder have been removed
// Only the unknown embedders stays in the document DB
@@ -5612,15 +5438,9 @@ mod tests {
// the document with the id 3 should have its original embedding updated
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
let (_id, compressed_doc) =
index.compressed_documents(&rtxn, Some(docid)).unwrap().remove(0);
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
let doc = obkv_to_json(&field_ids, &field_ids_map, doc).unwrap();
let doc = index.documents(&rtxn, Some(docid)).unwrap()[0];
let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap();
snapshot!(json_string!(doc), @r###"
{
"id": 3,
@@ -5732,20 +5552,12 @@ mod tests {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
let conf = index.embedding_configs(&rtxn).unwrap();
@@ -5780,20 +5592,12 @@ mod tests {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
let conf = index.embedding_configs(&rtxn).unwrap();
@@ -5904,20 +5708,12 @@ mod tests {
{
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###);
}
@@ -5947,20 +5743,12 @@ mod tests {
{
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
}
@@ -5988,20 +5776,12 @@ mod tests {
{
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let mut buffer = Vec::new();
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_compressed_documents(&rtxn)
.all_documents(&rtxn)
.unwrap()
.map(|ret| {
let (_id, compressed_doc) = ret.unwrap();
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
})
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
// FIXME: redaction

View File

@@ -11,6 +11,6 @@ edition.workspace = true
license.workspace = true
[dependencies]
insta = { version = "^1.39.0", features = ["json", "redactions"] }
insta = { version = "^1.34.0", features = ["json", "redactions"] }
md5 = "0.7.0"
once_cell = "1.19"

View File

@@ -11,16 +11,16 @@ edition.workspace = true
license.workspace = true
[dependencies]
base64 = "0.22.1"
enum-iterator = "2.1.0"
base64 = "0.21.7"
enum-iterator = "1.5.0"
hmac = "0.12.1"
maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5"
roaring = { version = "0.10.6", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
sha2 = "0.10.8"
thiserror = "1.0.61"
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.10.0", features = ["serde", "v4"] }
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.6.1", features = ["serde", "v4"] }

View File

@@ -188,12 +188,6 @@ impl AuthFilter {
self.allow_index_creation && self.is_index_authorized(index)
}
#[inline]
/// Return true if a tenant token was used to generate the search rules.
pub fn is_tenant_token(&self) -> bool {
self.search_rules.is_some()
}
pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self {
Self {
search_rules: None,
@@ -211,7 +205,6 @@ impl AuthFilter {
.unwrap_or(true)
}
/// Check if the index is authorized by the API key and the tenant token.
pub fn is_index_authorized(&self, index: &str) -> bool {
self.key_authorized_indexes.is_index_authorized(index)
&& self
@@ -221,44 +214,6 @@ impl AuthFilter {
.unwrap_or(true)
}
/// Only check if the index is authorized by the API key
pub fn api_key_is_index_authorized(&self, index: &str) -> bool {
self.key_authorized_indexes.is_index_authorized(index)
}
/// Only check if the index is authorized by the tenant token
pub fn tenant_token_is_index_authorized(&self, index: &str) -> bool {
self.search_rules
.as_ref()
.map(|search_rules| search_rules.is_index_authorized(index))
.unwrap_or(true)
}
/// Return the list of authorized indexes by the tenant token if any
pub fn tenant_token_list_index_authorized(&self) -> Vec<String> {
match self.search_rules {
Some(ref search_rules) => {
let mut indexes: Vec<_> = match search_rules {
SearchRules::Set(set) => set.iter().map(|s| s.to_string()).collect(),
SearchRules::Map(map) => map.keys().map(|s| s.to_string()).collect(),
};
indexes.sort_unstable();
indexes
}
None => Vec::new(),
}
}
/// Return the list of authorized indexes by the api key if any
pub fn api_key_list_index_authorized(&self) -> Vec<String> {
let mut indexes: Vec<_> = match self.key_authorized_indexes {
SearchRules::Set(ref set) => set.iter().map(|s| s.to_string()).collect(),
SearchRules::Map(ref map) => map.keys().map(|s| s.to_string()).collect(),
};
indexes.sort_unstable();
indexes
}
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
if !self.is_index_authorized(index) {
return None;

View File

@@ -11,36 +11,36 @@ edition.workspace = true
license.workspace = true
[dependencies]
actix-web = { version = "4.8.0", default-features = false }
anyhow = "1.0.86"
actix-web = { version = "4.6.0", default-features = false }
anyhow = "1.0.79"
convert_case = "0.6.0"
csv = "1.3.0"
deserr = { version = "0.6.2", features = ["actix-web"] }
either = { version = "1.13.0", features = ["serde"] }
enum-iterator = "2.1.0"
deserr = { version = "0.6.1", features = ["actix-web"] }
either = { version = "1.9.0", features = ["serde"] }
enum-iterator = "1.5.0"
file-store = { path = "../file-store" }
flate2 = "1.0.30"
flate2 = "1.0.28"
fst = "0.4.7"
memmap2 = "0.9.4"
memmap2 = "0.7.1"
milli = { path = "../milli" }
roaring = { version = "0.10.6", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde-cs = "0.2.4"
serde_json = "1.0.120"
tar = "0.4.41"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = [
serde_json = "1.0.111"
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = "1.38"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
tokio = "1.35"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies]
insta = "1.39.0"
insta = "1.34.0"
meili-snap = { path = "../meili-snap" }
[features]
@@ -54,8 +54,6 @@ chinese-pinyin = ["milli/chinese-pinyin"]
hebrew = ["milli/hebrew"]
# japanese specialized tokenization
japanese = ["milli/japanese"]
# korean specialized tokenization
korean = ["milli/korean"]
# thai specialized tokenization
thai = ["milli/thai"]
# allow greek specialized tokenization

View File

@@ -12,7 +12,7 @@ pub mod star_or;
pub mod task_view;
pub mod tasks;
pub mod versioning;
pub use milli::{heed, zstd, Index};
pub use milli::{heed, Index};
use uuid::Uuid;
pub use versioning::VERSION_FILE_NAME;
pub use {milli, serde_cs};

View File

@@ -14,123 +14,130 @@ default-run = "meilisearch"
[dependencies]
actix-cors = "0.7.0"
actix-http = { version = "3.8.0", default-features = false, features = [
actix-http = { version = "3.7.0", default-features = false, features = [
"compress-brotli",
"compress-gzip",
"rustls-0_21",
] }
actix-utils = "3.0.1"
actix-web = { version = "4.8.0", default-features = false, features = [
actix-web = { version = "4.6.0", default-features = false, features = [
"macros",
"compress-brotli",
"compress-gzip",
"cookies",
"rustls-0_21",
] }
anyhow = { version = "1.0.86", features = ["backtrace"] }
async-trait = "0.1.81"
bstr = "1.9.1"
byte-unit = { version = "5.1.4", default-features = false, features = [
actix-web-static-files = { version = "4.0.1", optional = true }
anyhow = { version = "1.0.79", features = ["backtrace"] }
async-stream = "0.3.5"
async-trait = "0.1.77"
bstr = "1.9.0"
byte-unit = { version = "4.0.19", default-features = false, features = [
"std",
"byte",
"serde",
] }
bytes = "1.6.0"
clap = { version = "4.5.9", features = ["derive", "env"] }
crossbeam-channel = "0.5.13"
deserr = { version = "0.6.2", features = ["actix-web"] }
bytes = "1.5.0"
clap = { version = "4.4.17", features = ["derive", "env"] }
crossbeam-channel = "0.5.11"
deserr = { version = "0.6.1", features = ["actix-web"] }
dump = { path = "../dump" }
either = "1.13.0"
either = "1.9.0"
file-store = { path = "../file-store" }
flate2 = "1.0.30"
flate2 = "1.0.28"
fst = "0.4.7"
futures = "0.3.30"
futures-util = "0.3.30"
http = "0.2.11"
index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.2.6", features = ["serde"] }
is-terminal = "0.4.12"
itertools = "0.13.0"
jsonwebtoken = "9.3.0"
lazy_static = "1.5.0"
indexmap = { version = "2.1.0", features = ["serde"] }
is-terminal = "0.4.10"
itertools = "0.11.0"
jsonwebtoken = "9.2.0"
lazy_static = "1.4.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.43", default-features = false }
mimalloc = { version = "0.1.39", default-features = false }
mime = "0.3.17"
num_cpus = "1.16.0"
obkv = "0.2.2"
obkv = "0.2.1"
once_cell = "1.19.0"
ordered-float = "4.2.1"
parking_lot = "0.12.3"
ordered-float = "4.2.0"
parking_lot = "0.12.1"
permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.14"
pin-project-lite = "0.2.13"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.4", features = ["process"] }
prometheus = { version = "0.13.3", features = ["process"] }
rand = "0.8.5"
rayon = "1.10.0"
regex = "1.10.5"
reqwest = { version = "0.12.5", features = [
rayon = "1.8.0"
regex = "1.10.2"
reqwest = { version = "0.11.23", features = [
"rustls-tls",
"json",
], default-features = false }
rustls = "0.21.12"
rustls-pemfile = "1.0.4"
segment = { version = "0.2.4", optional = true }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
rustls-pemfile = "1.0.2"
segment = { version = "0.2.3", optional = true }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
sha2 = "0.10.8"
siphasher = "1.0.1"
siphasher = "1.0.0"
slice-group-by = "0.3.1"
static-files = { version = "0.2.4", optional = true }
sysinfo = "0.30.13"
tar = "0.4.41"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = [
static-files = { version = "0.2.3", optional = true }
sysinfo = "0.30.5"
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = { version = "1.38.0", features = ["full"] }
toml = "0.8.14"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
tokio = { version = "1.35.1", features = ["full"] }
tokio-stream = "0.1.14"
toml = "0.8.8"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
walkdir = "2.4.0"
yaup = "0.2.1"
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.2", features = ["serde"] }
url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.11"
tracing-actix-web = "0.7.10"
build-info = { version = "1.7.0", path = "../build-info" }
[dev-dependencies]
actix-rt = "2.10.0"
actix-rt = "2.9.0"
assert-json-diff = "2.0.2"
brotli = "6.0.0"
insta = "1.39.0"
insta = "1.34.0"
manifest-dir-macros = "0.1.18"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6"
urlencoding = "2.1.3"
yaup = "0.3.1"
yaup = "0.2.1"
[build-dependencies]
anyhow = { version = "1.0.86", optional = true }
cargo_toml = { version = "0.20.3", optional = true }
anyhow = { version = "1.0.79", optional = true }
cargo_toml = { version = "0.18.0", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.12.5", features = [
reqwest = { version = "0.11.23", features = [
"blocking",
"rustls-tls",
], default-features = false, optional = true }
sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.4", optional = true }
tempfile = { version = "3.10.1", optional = true }
zip = { version = "2.1.3", default-features = false, features = ["deflate"], optional = true }
static-files = { version = "0.2.3", optional = true }
tempfile = { version = "3.9.0", optional = true }
zip = { version = "0.6.6", optional = true }
[features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
analytics = ["segment"]
mini-dashboard = [
"actix-web-static-files",
"static-files",
"anyhow",
"cargo_toml",
@@ -144,7 +151,6 @@ chinese = ["meilisearch-types/chinese"]
chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"]
korean = ["meilisearch-types/korean"]
thai = ["meilisearch-types/thai"]
greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"]

View File

@@ -5,9 +5,10 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, Instant};
use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
use actix_web::http::header::USER_AGENT;
use actix_web::HttpRequest;
use byte_unit::Byte;
use http::header::CONTENT_TYPE;
use index_scheduler::IndexScheduler;
use meilisearch_auth::{AuthController, AuthFilter};
use meilisearch_types::InstanceUid;
@@ -255,6 +256,7 @@ struct Infos {
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize,
experimental_disable_prefix_db: bool,
gpu_enabled: bool,
db_path: bool,
import_dump: bool,
@@ -297,6 +299,7 @@ impl From<Opt> for Infos {
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks,
experimental_disable_prefix_db,
http_addr,
master_key: _,
env,
@@ -346,6 +349,7 @@ impl From<Opt> for Infos {
experimental_replication_parameters,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
experimental_disable_prefix_db,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),

View File

@@ -1,6 +1,6 @@
use actix_web as aweb;
use aweb::error::{JsonPayloadError, QueryPayloadError};
use byte_unit::{Byte, UnitType};
use byte_unit::Byte;
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
@@ -33,7 +33,7 @@ pub enum MeilisearchHttpError {
TooManySearchRequests(usize),
#[error("Internal error: Search limiter is down.")]
SearchLimiterIsDown,
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_bytes(*.0 as u64).get_appropriate_unit(true))]
PayloadTooLarge(usize),
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
@@ -98,29 +98,14 @@ impl From<MeilisearchHttpError> for aweb::Error {
impl From<aweb::error::PayloadError> for MeilisearchHttpError {
fn from(error: aweb::error::PayloadError) -> Self {
match error {
aweb::error::PayloadError::Incomplete(_) => MeilisearchHttpError::Payload(
PayloadError::Payload(ActixPayloadError::IncompleteError),
),
_ => MeilisearchHttpError::Payload(PayloadError::Payload(
ActixPayloadError::OtherError(error),
)),
}
MeilisearchHttpError::Payload(PayloadError::Payload(error))
}
}
#[derive(Debug, thiserror::Error)]
pub enum ActixPayloadError {
#[error("The provided payload is incomplete and cannot be parsed")]
IncompleteError,
#[error(transparent)]
OtherError(aweb::error::PayloadError),
}
#[derive(Debug, thiserror::Error)]
pub enum PayloadError {
#[error(transparent)]
Payload(ActixPayloadError),
Payload(aweb::error::PayloadError),
#[error(transparent)]
Json(JsonPayloadError),
#[error(transparent)]
@@ -137,15 +122,13 @@ impl ErrorCode for PayloadError {
fn error_code(&self) -> Code {
match self {
PayloadError::Payload(e) => match e {
ActixPayloadError::IncompleteError => Code::BadRequest,
ActixPayloadError::OtherError(error) => match error {
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
aweb::error::PayloadError::UnknownLength => Code::Internal,
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
aweb::error::PayloadError::Io(_) => Code::Internal,
_ => todo!(),
},
aweb::error::PayloadError::Incomplete(_) => Code::Internal,
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
aweb::error::PayloadError::UnknownLength => Code::Internal,
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
aweb::error::PayloadError::Io(_) => Code::Internal,
_ => todo!(),
},
PayloadError::Json(err) => match err {
JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge,

View File

@@ -12,8 +12,6 @@ use futures::Future;
use meilisearch_auth::{AuthController, AuthFilter};
use meilisearch_types::error::{Code, ResponseError};
use self::policies::AuthError;
pub struct GuardedData<P, D> {
data: D,
filters: AuthFilter,
@@ -37,12 +35,12 @@ impl<P, D> GuardedData<P, D> {
let missing_master_key = auth.get_master_key().is_none();
match Self::authenticate(auth, token, index).await? {
Ok(filters) => match data {
Some(filters) => match data {
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
None => Err(AuthenticationError::IrretrievableState.into()),
},
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
Err(e) => Err(ResponseError::from_msg(e.to_string(), Code::InvalidApiKey)),
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
None => Err(AuthenticationError::InvalidToken.into()),
}
}
@@ -53,12 +51,12 @@ impl<P, D> GuardedData<P, D> {
let missing_master_key = auth.get_master_key().is_none();
match Self::authenticate(auth, String::new(), None).await? {
Ok(filters) => match data {
Some(filters) => match data {
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
None => Err(AuthenticationError::IrretrievableState.into()),
},
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
Err(_) => Err(AuthenticationError::MissingAuthorizationHeader.into()),
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
None => Err(AuthenticationError::MissingAuthorizationHeader.into()),
}
}
@@ -66,7 +64,7 @@ impl<P, D> GuardedData<P, D> {
auth: Data<AuthController>,
token: String,
index: Option<String>,
) -> Result<Result<AuthFilter, AuthError>, ResponseError>
) -> Result<Option<AuthFilter>, ResponseError>
where
P: Policy + 'static,
{
@@ -129,14 +127,13 @@ pub trait Policy {
auth: Data<AuthController>,
token: &str,
index: Option<&str>,
) -> Result<AuthFilter, policies::AuthError>;
) -> Option<AuthFilter>;
}
pub mod policies {
use actix_web::web::Data;
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
use meilisearch_types::error::{Code, ErrorCode};
// reexport actions in policies in order to be used in routes configuration.
pub use meilisearch_types::keys::{actions, Action};
use serde::{Deserialize, Serialize};
@@ -147,53 +144,11 @@ pub mod policies {
enum TenantTokenOutcome {
NotATenantToken,
Invalid,
Expired,
Valid(Uuid, SearchRules),
}
#[derive(thiserror::Error, Debug)]
pub enum AuthError {
#[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`.")]
ExpiredTenantToken { exp: i64, now: i64 },
#[error("The provided API key is invalid.")]
InvalidApiKey,
#[error("The provided tenant token cannot acces the index `{index}`, allowed indexes are {allowed:?}.")]
TenantTokenAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
#[error(
"The API key used to generate this tenant token cannot acces the index `{index}`."
)]
TenantTokenApiKeyAccessingnUnauthorizedIndex { index: String },
#[error(
"The API key cannot acces the index `{index}`, authorized indexes are {allowed:?}."
)]
ApiKeyAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
#[error("The provided tenant token is invalid.")]
InvalidTenantToken,
#[error("Could not decode tenant token, {0}.")]
CouldNotDecodeTenantToken(jsonwebtoken::errors::Error),
#[error("Invalid action `{0}`.")]
InternalInvalidAction(u8),
}
impl From<jsonwebtoken::errors::Error> for AuthError {
fn from(error: jsonwebtoken::errors::Error) -> Self {
use jsonwebtoken::errors::ErrorKind;
match error.kind() {
ErrorKind::InvalidToken => AuthError::InvalidTenantToken,
_ => AuthError::CouldNotDecodeTenantToken(error),
}
}
}
impl ErrorCode for AuthError {
fn error_code(&self) -> Code {
match self {
AuthError::InternalInvalidAction(_) => Code::Internal,
_ => Code::InvalidApiKey,
}
}
}
fn tenant_token_validation() -> Validation {
let mut validation = Validation::default();
validation.validate_exp = false;
@@ -203,15 +158,15 @@ pub mod policies {
}
/// Extracts the key id used to sign the payload, without performing any validation.
fn extract_key_id(token: &str) -> Result<Uuid, AuthError> {
fn extract_key_id(token: &str) -> Option<Uuid> {
let mut validation = tenant_token_validation();
validation.insecure_disable_signature_validation();
let dummy_key = DecodingKey::from_secret(b"secret");
let token_data = decode::<Claims>(token, &dummy_key, &validation)?;
let token_data = decode::<Claims>(token, &dummy_key, &validation).ok()?;
// get token fields without validating it.
let Claims { api_key_uid, .. } = token_data.claims;
Ok(api_key_uid)
Some(api_key_uid)
}
fn is_keys_action(action: u8) -> bool {
@@ -232,102 +187,76 @@ pub mod policies {
auth: Data<AuthController>,
token: &str,
index: Option<&str>,
) -> Result<AuthFilter, AuthError> {
) -> Option<AuthFilter> {
// authenticate if token is the master key.
// Without a master key, all routes are accessible except the key-related routes.
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
return Ok(AuthFilter::default());
return Some(AuthFilter::default());
}
let (key_uuid, search_rules) =
match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
Ok(TenantTokenOutcome::Valid(key_uuid, search_rules)) => {
TenantTokenOutcome::Valid(key_uuid, search_rules) => {
(key_uuid, Some(search_rules))
}
Ok(TenantTokenOutcome::NotATenantToken)
| Err(AuthError::InvalidTenantToken) => (
auth.get_optional_uid_from_encoded_key(token.as_bytes())
.map_err(|_e| AuthError::InvalidApiKey)?
.ok_or(AuthError::InvalidApiKey)?,
None,
),
Err(e) => return Err(e),
TenantTokenOutcome::Expired => return None,
TenantTokenOutcome::Invalid => return None,
TenantTokenOutcome::NotATenantToken => {
(auth.get_optional_uid_from_encoded_key(token.as_bytes()).ok()??, None)
}
};
// check that the indexes are allowed
let action = Action::from_repr(A).ok_or(AuthError::InternalInvalidAction(A))?;
let auth_filter = auth
.get_key_filters(key_uuid, search_rules)
.map_err(|_e| AuthError::InvalidApiKey)?;
// First check if the index is authorized in the tenant token, this is a public
// information, we can return a nice error message.
if let Some(index) = index {
if !auth_filter.tenant_token_is_index_authorized(index) {
return Err(AuthError::TenantTokenAccessingnUnauthorizedIndex {
index: index.to_string(),
allowed: auth_filter.tenant_token_list_index_authorized(),
});
}
if !auth_filter.api_key_is_index_authorized(index) {
if auth_filter.is_tenant_token() {
// If the error comes from a tenant token we cannot share the list
// of authorized indexes in the API key. This is not public information.
return Err(AuthError::TenantTokenApiKeyAccessingnUnauthorizedIndex {
index: index.to_string(),
});
} else {
// Otherwise we can share the list
// of authorized indexes in the API key.
return Err(AuthError::ApiKeyAccessingnUnauthorizedIndex {
index: index.to_string(),
allowed: auth_filter.api_key_list_index_authorized(),
});
}
}
}
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) {
return Ok(auth_filter);
let action = Action::from_repr(A)?;
let auth_filter = auth.get_key_filters(key_uuid, search_rules).ok()?;
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false)
&& index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true)
{
return Some(auth_filter);
}
Err(AuthError::InvalidApiKey)
None
}
}
impl<const A: u8> ActionPolicy<A> {
fn authenticate_tenant_token(
auth: &AuthController,
token: &str,
) -> Result<TenantTokenOutcome, AuthError> {
fn authenticate_tenant_token(auth: &AuthController, token: &str) -> TenantTokenOutcome {
// Only search action can be accessed by a tenant token.
if A != actions::SEARCH {
return Ok(TenantTokenOutcome::NotATenantToken);
return TenantTokenOutcome::NotATenantToken;
}
let uid = extract_key_id(token)?;
let uid = if let Some(uid) = extract_key_id(token) {
uid
} else {
return TenantTokenOutcome::NotATenantToken;
};
// Check if tenant token is valid.
let key = if let Some(key) = auth.generate_key(uid) {
key
} else {
return Err(AuthError::InvalidTenantToken);
return TenantTokenOutcome::Invalid;
};
let data = decode::<Claims>(
let data = if let Ok(data) = decode::<Claims>(
token,
&DecodingKey::from_secret(key.as_bytes()),
&tenant_token_validation(),
)?;
) {
data
} else {
return TenantTokenOutcome::Invalid;
};
// Check if token is expired.
if let Some(exp) = data.claims.exp {
let now = OffsetDateTime::now_utc().unix_timestamp();
if now > exp {
return Err(AuthError::ExpiredTenantToken { exp, now });
if OffsetDateTime::now_utc().unix_timestamp() > exp {
return TenantTokenOutcome::Expired;
}
}
Ok(TenantTokenOutcome::Valid(uid, data.claims.search_rules))
TenantTokenOutcome::Valid(uid, data.claims.search_rules)
}
}

View File

@@ -15,7 +15,6 @@ use std::fs::File;
use std::io::{BufReader, BufWriter};
use std::num::NonZeroUsize;
use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;
use std::thread::{self, available_parallelism};
use std::time::Duration;
@@ -24,13 +23,13 @@ use actix_cors::Cors;
use actix_http::body::MessageBody;
use actix_web::dev::{ServiceFactory, ServiceResponse};
use actix_web::error::JsonPayloadError;
use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
use actix_web::web::Data;
use actix_web::{web, HttpRequest};
use analytics::Analytics;
use anyhow::bail;
use error::PayloadError;
use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
@@ -168,7 +167,7 @@ impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
let conn_info = request.connection_info();
let headers = request.headers();
let user_agent = headers
.get(USER_AGENT)
.get(http::header::USER_AGENT)
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
.unwrap_or_default();
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
@@ -301,17 +300,18 @@ fn open_or_create_database_unchecked(
dumps_path: opt.dump_dir.clone(),
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
task_db_size: opt.max_task_db_size.as_u64() as usize,
index_base_map_size: opt.max_index_size.as_u64() as usize,
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features,
compute_prefix_databases: !opt.experimental_disable_prefix_db,
})?)
};
@@ -477,7 +477,7 @@ pub fn configure_data(
opt.experimental_search_queue_size,
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
);
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
config
.app_data(index_scheduler)
.app_data(auth)

View File

@@ -9,7 +9,7 @@ use std::str::FromStr;
use std::sync::Arc;
use std::{env, fmt, fs};
use byte_unit::{Byte, ParseError, UnitType};
use byte_unit::{Byte, ByteError};
use clap::Parser;
use meilisearch_types::features::InstanceTogglableFeatures;
use meilisearch_types::milli::update::IndexerConfig;
@@ -60,6 +60,7 @@ const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
const MEILI_EXPERIMENTAL_DISABLE_PREFIX_DB: &str = "MEILI_EXPERIMENTAL_DISABLE_PREFIXDB";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
@@ -389,6 +390,11 @@ pub struct Opt {
#[serde(default = "default_limit_batched_tasks")]
pub experimental_max_number_of_batched_tasks: usize,
/// Experimentally disable the prefix database, see: <https://github.com/orgs/meilisearch/discussions>
#[clap(long, env = MEILI_EXPERIMENTAL_DISABLE_PREFIX_DB)]
#[serde(default)]
pub experimental_disable_prefix_db: bool,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
@@ -489,6 +495,7 @@ impl Opt {
experimental_enable_logs_route,
experimental_replication_parameters,
experimental_reduce_indexing_memory_usage,
experimental_disable_prefix_db,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -518,6 +525,10 @@ impl Opt {
MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
experimental_max_number_of_batched_tasks.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_DISABLE_PREFIX_DB,
experimental_disable_prefix_db.to_string(),
);
if let Some(ssl_cert_path) = ssl_cert_path {
export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
}
@@ -674,7 +685,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
Ok(Self {
log_every_n: Some(DEFAULT_LOG_EVERY_N),
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
thread_pool: Some(thread_pool),
max_positions_per_attributes: None,
skip_index_budget: other.skip_index_budget,
@@ -688,25 +699,23 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
pub struct MaxMemory(Option<Byte>);
impl FromStr for MaxMemory {
type Err = ParseError;
type Err = ByteError;
fn from_str(s: &str) -> Result<MaxMemory, Self::Err> {
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
Byte::from_str(s).map(Some).map(MaxMemory)
}
}
impl Default for MaxMemory {
fn default() -> MaxMemory {
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64))
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_bytes))
}
}
impl fmt::Display for MaxMemory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
Some(memory) => {
write!(f, "{}", memory.get_appropriate_unit(UnitType::Binary))
}
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
None => f.write_str("unknown"),
}
}
@@ -846,11 +855,11 @@ fn default_env() -> String {
}
fn default_max_index_size() -> Byte {
Byte::from_u64(INDEX_SIZE)
Byte::from_bytes(INDEX_SIZE)
}
fn default_max_task_db_size() -> Byte {
Byte::from_u64(TASK_DB_SIZE)
Byte::from_bytes(TASK_DB_SIZE)
}
fn default_http_payload_size_limit() -> Byte {

View File

@@ -603,51 +603,42 @@ fn some_documents<'a, 't: 'a>(
retrieve_vectors: RetrieveVectors,
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let dictionary = index.document_decompression_dictionary(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index.embedding_configs(rtxn)?;
let mut buffer = Vec::new();
Ok(index.iter_compressed_documents(rtxn, doc_ids)?.map(move |ret| {
ret.map_err(ResponseError::from).and_then(
|(key, compressed_document)| -> Result<_, ResponseError> {
let document = compressed_document
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())?;
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
match retrieve_vectors {
RetrieveVectors::Ignore => {}
RetrieveVectors::Hide => {
document.remove("_vectors");
}
RetrieveVectors::Retrieve => {
// Clippy is simply wrong
#[allow(clippy::manual_unwrap_or_default)]
let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, vector) in index.embeddings(rtxn, key)? {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_provided.contains(key));
let embeddings = ExplicitVectors {
embeddings: Some(vector.into()),
regenerate: !user_provided,
};
vectors.insert(
name,
serde_json::to_value(embeddings)
.map_err(MeilisearchHttpError::from)?,
);
}
document.insert("_vectors".into(), vectors.into());
}
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
match retrieve_vectors {
RetrieveVectors::Ignore => {}
RetrieveVectors::Hide => {
document.remove("_vectors");
}
RetrieveVectors::Retrieve => {
let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, vector) in index.embeddings(rtxn, key)? {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_provided.contains(key));
let embeddings = ExplicitVectors {
embeddings: Some(vector.into()),
regenerate: !user_provided,
};
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
);
}
document.insert("_vectors".into(), vectors.into());
}
}
Ok(document)
},
)
Ok(document)
})
}))
}

View File

@@ -752,15 +752,10 @@ fn prepare_search<'t>(
SearchKind::SemanticOnly { embedder_name, embedder } => {
let vector = match query.vector.clone() {
Some(vector) => vector,
None => {
let span = tracing::trace_span!(target: "search::vector", "embed_one");
let _entered = span.enter();
embedder
.embed_one(query.q.clone().unwrap())
.map_err(milli::vector::Error::from)
.map_err(milli::Error::from)?
}
None => embedder
.embed_one(query.q.clone().unwrap())
.map_err(milli::vector::Error::from)
.map_err(milli::Error::from)?,
};
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
@@ -1123,16 +1118,10 @@ fn make_hits(
formatter_builder.crop_marker(format.crop_marker);
formatter_builder.highlight_prefix(format.highlight_pre_tag);
formatter_builder.highlight_suffix(format.highlight_post_tag);
let decompression_dictionary = index.document_decompression_dictionary(rtxn)?;
let mut buffer = Vec::new();
let mut documents = Vec::new();
let embedding_configs = index.embedding_configs(rtxn)?;
let documents_iter = index.compressed_documents(rtxn, documents_ids)?;
for ((id, compressed), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
let obkv = compressed
.decompress_with_optional_dictionary(&mut buffer, decompression_dictionary.as_ref())
// TODO use a better error?
.map_err(|e| MeilisearchHttpError::HeedError(e.into()))?;
let documents_iter = index.documents(rtxn, documents_ids)?;
for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
// First generate a document with all the displayed fields
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
@@ -1156,8 +1145,6 @@ fn make_hits(
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
if retrieve_vectors == RetrieveVectors::Retrieve {
// Clippy is wrong
#[allow(clippy::manual_unwrap_or_default)]
let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map,
_ => Default::default(),
@@ -1344,23 +1331,13 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
// TODO: TAMO: milli encountered an internal error, what do we want to do?
let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()];
let geo_point = &document.get("_geo").unwrap_or(&json!(null));
if let Some((lat, lng)) =
extract_geo_value(&geo_point["lat"]).zip(extract_geo_value(&geo_point["lng"]))
{
if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
let distance = milli::distance_between_two_points(&base, &[lat, lng]);
document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
}
}
}
fn extract_geo_value(value: &Value) -> Option<f64> {
match value {
Value::Number(n) => n.as_f64(),
Value::String(s) => s.parse().ok(),
_ => None,
}
}
fn compute_formatted_options(
attr_to_highlight: &HashSet<String>,
attr_to_crop: &[String],
@@ -1734,54 +1711,4 @@ mod test {
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), None);
}
#[test]
fn test_insert_geo_distance_with_coords_as_string() {
let value: Document = serde_json::from_str(
r#"{
"_geo": {
"lat": "50",
"lng": 3
}
}"#,
)
.unwrap();
let sorters = &["_geoPoint(50,3):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let value: Document = serde_json::from_str(
r#"{
"_geo": {
"lat": "50",
"lng": "3"
},
"id": "1"
}"#,
)
.unwrap();
let sorters = &["_geoPoint(50,3):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let value: Document = serde_json::from_str(
r#"{
"_geo": {
"lat": 50,
"lng": "3"
},
"id": "1"
}"#,
)
.unwrap();
let sorters = &["_geoPoint(50,3):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
}
}

View File

@@ -78,7 +78,7 @@ pub static ALL_ACTIONS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
});
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
json!({"message": null,
json!({"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
@@ -119,8 +119,7 @@ async fn error_access_expired_key() {
thread::sleep(time::Duration::new(1, 0));
for (method, route) in AUTHORIZATIONS.keys() {
let (mut response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request(method, route).await;
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
assert_eq!(403, code, "{:?}", &response);
@@ -150,8 +149,7 @@ async fn error_access_unauthorized_index() {
// filter `products` index routes
.filter(|(_, route)| route.starts_with("/indexes/products"))
{
let (mut response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request(method, route).await;
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
assert_eq!(403, code, "{:?}", &response);
@@ -178,8 +176,7 @@ async fn error_access_unauthorized_action() {
let key = response["key"].as_str().unwrap();
server.use_api_key(key);
let (mut response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request(method, route).await;
assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
assert_eq!(403, code, "{:?}", &response);
@@ -283,7 +280,7 @@ async fn access_authorized_no_index_restriction() {
route,
action
);
assert_ne!(code, 403, "on route: {:?} - {:?} with action: {:?}", method, route, action);
assert_ne!(code, 403);
}
}
}

View File

@@ -1,10 +1,7 @@
use actix_web::http::StatusCode;
use actix_web::test;
use jsonwebtoken::{EncodingKey, Header};
use meili_snap::*;
use uuid::Uuid;
use crate::common::{Server, Value};
use crate::common::Server;
use crate::json;
#[actix_rt::test]
@@ -439,262 +436,3 @@ async fn patch_api_keys_unknown_field() {
}
"###);
}
async fn send_request_with_custom_auth(
app: impl actix_web::dev::Service<
actix_http::Request,
Response = actix_web::dev::ServiceResponse<impl actix_web::body::MessageBody>,
Error = actix_web::Error,
>,
url: &str,
auth: &str,
) -> (Value, StatusCode) {
let req = test::TestRequest::get().uri(url).insert_header(("Authorization", auth)).to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
(response, status_code)
}
#[actix_rt::test]
async fn invalid_auth_format() {
let server = Server::new_auth().await;
let app = server.init_web_app().await;
let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"401 Unauthorized");
snapshot!(response, @r###"
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
"###);
let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"401 Unauthorized");
snapshot!(response, @r###"
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
"###);
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/documents", "Bearer").await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
}
#[actix_rt::test]
async fn invalid_api_key() {
let server = Server::new_auth().await;
let app = server.init_web_app().await;
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", "Bearer kefir").await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
let uuid = Uuid::nil();
let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
let req = test::TestRequest::post()
.uri("/keys")
.insert_header(("Authorization", "Bearer MASTER_KEY"))
.set_json(&key)
.to_request();
let res = test::call_service(&app, req).await;
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###"
{
"name": null,
"description": null,
"key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
"uid": "00000000-0000-0000-0000-000000000000",
"actions": [
"search"
],
"indexes": [
"dog"
],
"expiresAt": null,
"createdAt": "[date]",
"updatedAt": "[date]"
}
"###);
let key = response["key"].as_str().unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {key}"))
.await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The API key cannot acces the index `doggo`, authorized indexes are [\"dog\"].",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
}
#[actix_rt::test]
async fn invalid_tenant_token() {
let server = Server::new_auth().await;
let app = server.init_web_app().await;
// The tenant token won't be recognized at all if we're not on a search route
let claims = json!({ "tamo": "kefir" });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/documents", &format!("Bearer {jwt}"))
.await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
let claims = json!({ "tamo": "kefir" });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "Could not decode tenant token, JSON error: missing field `searchRules` at line 1 column 16.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
// The error messages are not ideal but that's expected since we cannot _yet_ use deserr
let claims = json!({ "searchRules": "kefir" });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "Could not decode tenant token, JSON error: data did not match any variant of untagged enum SearchRules at line 1 column 23.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
let uuid = Uuid::nil();
let claims = json!({ "searchRules": ["kefir"], "apiKeyUid": uuid.to_string() });
let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
.unwrap();
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "Could not decode tenant token, InvalidSignature.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
// ~~ For the next tests we first need a valid API key
let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
let req = test::TestRequest::post()
.uri("/keys")
.insert_header(("Authorization", "Bearer MASTER_KEY"))
.set_json(&key)
.to_request();
let res = test::call_service(&app, req).await;
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###"
{
"name": null,
"description": null,
"key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
"uid": "00000000-0000-0000-0000-000000000000",
"actions": [
"search"
],
"indexes": [
"dog"
],
"expiresAt": null,
"createdAt": "[date]",
"updatedAt": "[date]"
}
"###);
let key = response["key"].as_str().unwrap();
let claims = json!({ "searchRules": ["doggo", "catto"], "apiKeyUid": uuid.to_string() });
let jwt = jsonwebtoken::encode(
&Header::default(),
&claims,
&EncodingKey::from_secret(key.as_bytes()),
)
.unwrap();
// Try to access an index that is not authorized by the tenant token
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The provided tenant token cannot acces the index `dog`, allowed indexes are [\"catto\", \"doggo\"].",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
// Try to access an index that *is* authorized by the tenant token but not by the api key used to generate the tt
let (response, status_code) =
send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {jwt}"))
.await;
snapshot!(status_code, @"403 Forbidden");
snapshot!(response, @r###"
{
"message": "The API key used to generate this tenant token cannot acces the index `doggo`.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
}

View File

@@ -53,8 +53,7 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
});
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
json!({
"message": null,
json!({"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
@@ -192,9 +191,7 @@ macro_rules! compute_forbidden_search {
server.use_api_key(&web_token);
let index = server.index("sales");
index
.search(json!({}), |mut response, code| {
// We don't assert anything on the message since it may change between cases
response["message"] = serde_json::json!(null);
.search(json!({}), |response, code| {
assert_eq!(
response,
INVALID_RESPONSE.clone(),
@@ -498,8 +495,7 @@ async fn error_access_forbidden_routes() {
for ((method, route), actions) in AUTHORIZATIONS.iter() {
if !actions.contains("search") {
let (mut response, code) = server.dummy_request(method, route).await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request(method, route).await;
assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403);
}
@@ -533,16 +529,14 @@ async fn error_access_expired_parent_key() {
server.use_api_key(&web_token);
// test search request while parent_key is not expired
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
// wait until the key is expired.
thread::sleep(time::Duration::new(1, 0));
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403);
}
@@ -591,8 +585,7 @@ async fn error_access_modified_token() {
.join(".");
server.use_api_key(&altered_token);
let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
response["message"] = serde_json::json!(null);
let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403);
}

View File

@@ -109,11 +109,9 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
fn invalid_response(query_index: Option<usize>) -> Value {
let message = if let Some(query_index) = query_index {
json!(format!("Inside `.queries[{query_index}]`: The provided API key is invalid."))
format!("Inside `.queries[{query_index}]`: The provided API key is invalid.")
} else {
// if it's anything else we simply return null and will tests all the
// error messages somewhere else
json!(null)
"The provided API key is invalid.".to_string()
};
json!({"message": message,
"code": "invalid_api_key",
@@ -416,10 +414,7 @@ macro_rules! compute_forbidden_single_search {
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
server.use_api_key(&web_token);
let (mut response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
if failed_query_index.is_none() && !response["message"].is_null() {
response["message"] = serde_json::json!(null);
}
let (response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
assert_eq!(
response,
invalid_response(failed_query_index),
@@ -474,13 +469,10 @@ macro_rules! compute_forbidden_multiple_search {
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
server.use_api_key(&web_token);
let (mut response, code) = server.multi_search(json!({"queries" : [
let (response, code) = server.multi_search(json!({"queries" : [
{"indexUid": "sales"},
{"indexUid": "products"},
]})).await;
if failed_query_index.is_none() && !response["message"].is_null() {
response["message"] = serde_json::json!(null);
}
assert_eq!(
response,
invalid_response(failed_query_index),
@@ -1081,20 +1073,18 @@ async fn error_access_expired_parent_key() {
server.use_api_key(&web_token);
// test search request while parent_key is not expired
let (mut response, code) = server
let (response, code) = server
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
.await;
response["message"] = serde_json::json!(null);
assert_ne!(response, invalid_response(None));
assert_ne!(code, 403);
// wait until the key is expired.
thread::sleep(time::Duration::new(1, 0));
let (mut response, code) = server
let (response, code) = server
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
.await;
response["message"] = serde_json::json!(null);
assert_eq!(response, invalid_response(None));
assert_eq!(code, 403);
}
@@ -1144,9 +1134,8 @@ async fn error_access_modified_token() {
.join(".");
server.use_api_key(&altered_token);
let (mut response, code) =
let (response, code) =
server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await;
response["message"] = serde_json::json!(null);
assert_eq!(response, invalid_response(None));
assert_eq!(code, 403);
}

View File

@@ -185,7 +185,7 @@ impl Index<'_> {
pub async fn get_document(&self, id: u64, options: Option<Value>) -> (Value, StatusCode) {
let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id);
if let Some(options) = options {
write!(url, "{}", yaup::to_string(&options).unwrap()).unwrap();
write!(url, "?{}", yaup::to_string(&options).unwrap()).unwrap();
}
self.service.get(url).await
}
@@ -202,7 +202,7 @@ impl Index<'_> {
pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) {
let url = format!(
"/indexes/{}/documents{}",
"/indexes/{}/documents?{}",
urlencode(self.uid.as_ref()),
yaup::to_string(&options).unwrap()
);
@@ -365,7 +365,7 @@ impl Index<'_> {
}
pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query);
let url = format!("/indexes/{}/search?{}", urlencode(self.uid.as_ref()), query);
self.service.get(url).await
}
@@ -402,7 +402,7 @@ impl Index<'_> {
}
pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) {
let url = format!("/indexes/{}/similar{}", urlencode(self.uid.as_ref()), query);
let url = format!("/indexes/{}/similar?{}", urlencode(self.uid.as_ref()), query);
self.service.get(url).await
}
@@ -427,11 +427,8 @@ impl Index<'_> {
#[derive(Debug, Default, serde::Serialize)]
#[serde(rename_all = "camelCase")]
pub struct GetAllDocumentsOptions {
#[serde(skip_serializing_if = "Option::is_none")]
pub limit: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub offset: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub fields: Option<Vec<&'static str>>,
pub retrieve_vectors: bool,
pub fields: Option<Vec<&'static str>>,
}

View File

@@ -42,12 +42,6 @@ impl std::ops::Deref for Value {
}
}
impl std::ops::DerefMut for Value {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl PartialEq<serde_json::Value> for Value {
fn eq(&self, other: &serde_json::Value) -> bool {
&self.0 == other

View File

@@ -6,7 +6,7 @@ use std::time::Duration;
use actix_http::body::MessageBody;
use actix_web::dev::ServiceResponse;
use actix_web::http::StatusCode;
use byte_unit::{Byte, Unit};
use byte_unit::{Byte, ByteUnit};
use clap::Parser;
use meilisearch::option::{IndexerOpts, MaxMemory, Opt};
use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer};
@@ -231,9 +231,9 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
env: "development".to_owned(),
#[cfg(feature = "analytics")]
no_analytics: true,
max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(),
max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(),
http_payload_size_limit: Byte::from_u64_with_unit(10, Unit::MiB).unwrap(),
max_index_size: Byte::from_unit(100.0, ByteUnit::MiB).unwrap(),
max_task_db_size: Byte::from_unit(1.0, ByteUnit::GiB).unwrap(),
http_payload_size_limit: Byte::from_unit(10.0, ByteUnit::MiB).unwrap(),
snapshot_dir: ".".into(),
indexer_options: IndexerOpts {
// memory has to be unlimited because several meilisearch are running in test context.

View File

@@ -183,58 +183,6 @@ async fn add_single_document_gzip_encoded() {
}
"###);
}
#[actix_rt::test]
async fn add_single_document_gzip_encoded_with_incomplete_error() {
let document = json!("kefir");
// this is a what is expected and should work
let server = Server::new().await;
let app = server.init_web_app().await;
// post
let document = serde_json::to_string(&document).unwrap();
let req = test::TestRequest::post()
.uri("/indexes/dog/documents")
.set_payload(document.to_string())
.insert_header(("content-type", "application/json"))
.insert_header(("content-encoding", "gzip"))
.to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"400 Bad Request");
snapshot!(json_string!(response),
@r###"
{
"message": "The provided payload is incomplete and cannot be parsed",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// put
let req = test::TestRequest::put()
.uri("/indexes/dog/documents")
.set_payload(document.to_string())
.insert_header(("content-type", "application/json"))
.insert_header(("content-encoding", "gzip"))
.to_request();
let res = test::call_service(&app, req).await;
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
snapshot!(status_code, @"400 Bad Request");
snapshot!(json_string!(response),
@r###"
{
"message": "The provided payload is incomplete and cannot be parsed",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
/// Here we try document request with every encoding
#[actix_rt::test]
@@ -1092,52 +1040,6 @@ async fn document_addition_with_primary_key() {
"###);
}
#[actix_rt::test]
async fn document_addition_with_huge_int_primary_key() {
let server = Server::new().await;
let index = server.index("test");
let documents = json!([
{
"primary": 14630868576586246730u64,
"content": "foo",
}
]);
let (response, code) = index.add_documents(documents, Some("primary")).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(response,
@r###"
{
"uid": 0,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index.get_document(14630868576586246730u64, None).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response),
@r###"
{
"primary": 14630868576586246730,
"content": "foo"
}
"###);
}
#[actix_rt::test]
async fn replace_document() {
let server = Server::new().await;
@@ -2274,7 +2176,7 @@ async fn error_add_documents_payload_size() {
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"message": "The provided payload reached the size limit. The maximum accepted payload size is 10 MiB.",
"message": "The provided payload reached the size limit. The maximum accepted payload size is 10.00 MiB.",
"code": "payload_too_large",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#payload_too_large"

View File

@@ -719,7 +719,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!(null)).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type: expected an object, but found null",
"code": "bad_request",
@@ -730,7 +730,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
"code": "invalid_document_offset",
@@ -741,7 +741,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
"code": "invalid_document_limit",
@@ -752,7 +752,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`",
"code": "invalid_document_fields",
@@ -763,7 +763,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
"code": "invalid_document_filter",
@@ -774,7 +774,7 @@ async fn fetch_document_by_filter() {
let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"code": "invalid_document_filter",
@@ -786,7 +786,7 @@ async fn fetch_document_by_filter() {
let (response, code) =
index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese",
"code": "invalid_document_filter",
@@ -803,7 +803,7 @@ async fn retrieve_vectors() {
// GETALL DOCUMENTS BY QUERY
let (response, _code) = index.get_all_documents_raw("?retrieveVectors=tamo").await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
"code": "invalid_document_retrieve_vectors",
@@ -812,7 +812,7 @@ async fn retrieve_vectors() {
}
"###);
let (response, _code) = index.get_all_documents_raw("?retrieveVectors=true").await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
@@ -824,7 +824,7 @@ async fn retrieve_vectors() {
// FETCHALL DOCUMENTS BY POST
let (response, _code) =
index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`",
"code": "invalid_document_retrieve_vectors",
@@ -833,7 +833,7 @@ async fn retrieve_vectors() {
}
"###);
let (response, _code) = index.get_document_by_filter(json!({ "retrieveVectors": true })).await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
@@ -844,7 +844,7 @@ async fn retrieve_vectors() {
// GET A SINGLEDOCUMENT
let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": "tamo"}))).await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
"code": "invalid_document_retrieve_vectors",
@@ -853,7 +853,7 @@ async fn retrieve_vectors() {
}
"###);
let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": true}))).await;
snapshot!(response, @r###"
snapshot!(json_string!(response), @r###"
{
"message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",

View File

@@ -1,5 +1,5 @@
use actix_web::http::header::ACCEPT_ENCODING;
use actix_web::test;
use http::header::ACCEPT_ENCODING;
use meili_snap::*;
use urlencoding::encode as urlencode;
@@ -644,7 +644,12 @@ async fn get_document_with_vectors() {
{
"id": 1,
"name": "echo",
"_vectors": {}
"_vectors": {
"manual": {
"embeddings": [],
"regenerate": false
}
}
}
],
"offset": 0,
@@ -700,7 +705,12 @@ async fn get_document_with_vectors() {
},
{
"name": "echo",
"_vectors": {}
"_vectors": {
"manual": {
"embeddings": [],
"regenerate": false
}
}
}
],
"offset": 0,

View File

@@ -1,5 +1,6 @@
use actix_web::http::header::{ContentType, ACCEPT_ENCODING};
use actix_web::http::header::ContentType;
use actix_web::test;
use http::header::ACCEPT_ENCODING;
use meili_snap::{json_string, snapshot};
use meilisearch::Opt;

View File

@@ -71,7 +71,7 @@ async fn search_bad_offset() {
}
"###);
let (response, code) = index.search_get("?offset=doggo").await;
let (response, code) = index.search_get("offset=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -99,7 +99,7 @@ async fn search_bad_limit() {
}
"###);
let (response, code) = index.search_get("?limit=doggo").await;
let (response, code) = index.search_get("limit=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -127,7 +127,7 @@ async fn search_bad_page() {
}
"###);
let (response, code) = index.search_get("?page=doggo").await;
let (response, code) = index.search_get("page=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -155,7 +155,7 @@ async fn search_bad_hits_per_page() {
}
"###);
let (response, code) = index.search_get("?hitsPerPage=doggo").await;
let (response, code) = index.search_get("hitsPerPage=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -212,7 +212,7 @@ async fn search_bad_retrieve_vectors() {
}
"###);
let (response, code) = index.search_get("?retrieveVectors=").await;
let (response, code) = index.search_get("retrieveVectors=").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -223,7 +223,7 @@ async fn search_bad_retrieve_vectors() {
}
"###);
let (response, code) = index.search_get("?retrieveVectors=doggo").await;
let (response, code) = index.search_get("retrieveVectors=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -269,7 +269,7 @@ async fn search_bad_crop_length() {
}
"###);
let (response, code) = index.search_get("?cropLength=doggo").await;
let (response, code) = index.search_get("cropLength=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -359,7 +359,7 @@ async fn search_bad_show_matches_position() {
}
"###);
let (response, code) = index.search_get("?showMatchesPosition=doggo").await;
let (response, code) = index.search_get("showMatchesPosition=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -442,7 +442,7 @@ async fn search_non_filterable_facets() {
}
"###);
let (response, code) = index.search_get("?facets=doggo").await;
let (response, code) = index.search_get("facets=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -472,7 +472,7 @@ async fn search_non_filterable_facets_multiple_filterable() {
}
"###);
let (response, code) = index.search_get("?facets=doggo").await;
let (response, code) = index.search_get("facets=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -502,7 +502,7 @@ async fn search_non_filterable_facets_no_filterable() {
}
"###);
let (response, code) = index.search_get("?facets=doggo").await;
let (response, code) = index.search_get("facets=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -532,7 +532,7 @@ async fn search_non_filterable_facets_multiple_facets() {
}
"###);
let (response, code) = index.search_get("?facets=doggo,neko").await;
let (response, code) = index.search_get("facets=doggo,neko").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -625,7 +625,7 @@ async fn search_bad_matching_strategy() {
}
"###);
let (response, code) = index.search_get("?matchingStrategy=doggo").await;
let (response, code) = index.search_get("matchingStrategy=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{

View File

@@ -150,8 +150,7 @@ async fn bug_4640() {
"_geo": {
"lat": "45.4777599",
"lng": "9.1967508"
},
"_geoDistance": 0
}
},
{
"id": 1,

View File

@@ -241,7 +241,7 @@ async fn similar_bad_offset() {
}
"###);
let (response, code) = index.similar_get("?id=287947&offset=doggo").await;
let (response, code) = index.similar_get("id=287947&offset=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -283,7 +283,7 @@ async fn similar_bad_limit() {
}
"###);
let (response, code) = index.similar_get("?id=287946&limit=doggo").await;
let (response, code) = index.similar_get("id=287946&limit=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -785,7 +785,7 @@ async fn similar_bad_retrieve_vectors() {
}
"###);
let (response, code) = index.similar_get("?retrieveVectors=").await;
let (response, code) = index.similar_get("retrieveVectors=").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@@ -796,7 +796,7 @@ async fn similar_bad_retrieve_vectors() {
}
"###);
let (response, code) = index.similar_get("?retrieveVectors=doggo").await;
let (response, code) = index.similar_get("retrieveVectors=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{

View File

@@ -2,7 +2,6 @@ mod errors;
mod webhook;
use meili_snap::insta::assert_json_snapshot;
use meili_snap::snapshot;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
@@ -739,9 +738,11 @@ async fn test_summarized_index_creation() {
async fn test_summarized_index_deletion() {
let server = Server::new().await;
let index = server.index("test");
let (ret, _code) = index.delete().await;
let task = index.wait_task(ret.uid()).await;
snapshot!(task,
index.delete().await;
index.wait_task(0).await;
let (task, _) = index.get_task(0).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 0,
@@ -766,34 +767,12 @@ async fn test_summarized_index_deletion() {
"###);
// is the details correctly set when documents are actually deleted.
// /!\ We need to wait for the document addition to be processed otherwise, if the test runs too slow,
// both tasks may get autobatched and the deleted documents count will be wrong.
let (ret, _code) =
index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await;
let task = index.wait_task(ret.uid()).await;
snapshot!(task,
@r###"
{
"uid": 1,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (ret, _code) = index.delete().await;
let task = index.wait_task(ret.uid()).await;
snapshot!(task,
index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await;
index.delete().await;
index.wait_task(2).await;
let (task, _) = index.get_task(2).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 2,
@@ -813,25 +792,22 @@ async fn test_summarized_index_deletion() {
"###);
// What happens when you delete an index that doesn't exists.
let (ret, _code) = index.delete().await;
let task = index.wait_task(ret.uid()).await;
snapshot!(task,
index.delete().await;
index.wait_task(2).await;
let (task, _) = index.get_task(2).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 3,
"uid": 2,
"indexUid": "test",
"status": "failed",
"status": "succeeded",
"type": "indexDeletion",
"canceledBy": null,
"details": {
"deletedDocuments": 0
},
"error": {
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
"deletedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",

View File

@@ -119,7 +119,12 @@ async fn add_remove_user_provided() {
{
"id": 1,
"name": "echo",
"_vectors": {}
"_vectors": {
"manual": {
"embeddings": [],
"regenerate": false
}
}
}
],
"offset": 0,
@@ -141,7 +146,12 @@ async fn add_remove_user_provided() {
{
"id": 1,
"name": "echo",
"_vectors": {}
"_vectors": {
"manual": {
"embeddings": [],
"regenerate": false
}
}
}
],
"offset": 0,
@@ -577,7 +587,12 @@ async fn add_remove_one_vector_4588() {
{
"id": 0,
"name": "kefir",
"_vectors": {}
"_vectors": {
"manual": {
"embeddings": [],
"regenerate": false
}
}
}
],
"offset": 0,

View File

@@ -9,11 +9,11 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.86"
clap = { version = "4.5.9", features = ["derive"] }
anyhow = "1.0.79"
clap = { version = "4.4.17", features = ["derive"] }
dump = { path = "../dump" }
file-store = { path = "../file-store" }
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
time = { version = "0.3.36", features = ["formatting"] }
uuid = { version = "1.10.0", features = ["v4"], default-features = false }
time = { version = "0.3.31", features = ["formatting"] }
uuid = { version = "1.6.1", features = ["v4"], default-features = false }

View File

@@ -260,7 +260,6 @@ fn export_a_dump(
// 4. Dump the indexes
let mut count = 0;
let mut buffer = Vec::new();
for result in index_mapping.iter(&rtxn)? {
let (uid, uuid) = result?;
let index_path = db_path.join("indexes").join(uuid.to_string());
@@ -269,7 +268,6 @@ fn export_a_dump(
})?;
let rtxn = index.read_txn()?;
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
let metadata = IndexMetadata {
uid: uid.to_owned(),
primary_key: index.primary_key(&rtxn)?.map(String::from),
@@ -282,11 +280,8 @@ fn export_a_dump(
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
// 4.1. Dump the documents
for ret in index.all_compressed_documents(&rtxn)? {
let (_id, compressed_doc) = ret?;
let doc = compressed_doc
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
for ret in index.all_documents(&rtxn)? {
let (_id, doc) = ret?;
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?;
}

View File

@@ -1,6 +1,6 @@
[package]
name = "milli"
edition = "2021"
edition = "2018"
publish = false
version.workspace = true
@@ -14,81 +14,81 @@ license.workspace = true
[dependencies]
bimap = { version = "0.6.3", features = ["serde"] }
bincode = "1.3.3"
bstr = "1.9.1"
bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
bstr = "1.9.0"
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0"
charabia = { version = "0.8.12", default-features = false }
charabia = { version = "0.8.11", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.13"
deserr = "0.6.2"
either = { version = "1.13.0", features = ["serde"] }
crossbeam-channel = "0.5.11"
deserr = "0.6.1"
either = { version = "1.9.0", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
fxhash = "0.2.1"
geoutils = "0.5.1"
grenad = { version = "0.4.7", default-features = false, features = [
grenad = { version = "0.4.6", default-features = false, features = [
"rayon",
"tempfile",
] }
heed = { version = "0.20.3", default-features = false, features = [
heed = { version = "0.20.1", default-features = false, features = [
"serde-json",
"serde-bincode",
"read-txn-no-tls",
] }
indexmap = { version = "2.2.6", features = ["serde"] }
indexmap = { version = "2.1.0", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
zstd = { version = "0.13.1", features = ["zdict_builder", "experimental"] }
memmap2 = "0.9.4"
obkv = "0.2.2"
memmap2 = "0.7.1"
obkv = "0.2.1"
once_cell = "1.19.0"
ordered-float = "4.2.1"
rayon = "1.10.0"
roaring = { version = "0.10.6", features = ["serde"] }
rstar = { version = "0.12.0", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
ordered-float = "4.2.0"
rand_pcg = { version = "0.3.1", features = ["serde1"] }
rayon = "1.8.0"
roaring = { version = "0.10.2", features = ["serde"] }
rstar = { version = "0.11.0", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
slice-group-by = "0.3.1"
smallstr = { version = "0.3.0", features = ["serde"] }
smallvec = "1.13.2"
smallvec = "1.12.0"
smartstring = "1.0.1"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = [
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
uuid = { version = "1.10.0", features = ["v4"] }
uuid = { version = "1.6.1", features = ["v4"] }
filter-parser = { path = "../filter-parser" }
# documents words self-join
itertools = "0.13.0"
itertools = "0.11.0"
csv = "1.3.0"
candle-core = { version = "0.6.0" }
candle-transformers = { version = "0.6.0" }
candle-nn = { version = "0.6.0" }
candle-core = { version = "0.4.1" }
candle-transformers = { version = "0.4.1" }
candle-nn = { version = "0.4.1" }
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
"onig",
] }
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
"online",
] }
tiktoken-rs = "0.5.9"
liquid = "0.26.6"
tiktoken-rs = "0.5.8"
liquid = "0.26.4"
arroy = "0.4.0"
rand = "0.8.5"
tracing = "0.1.40"
ureq = { version = "2.10.0", features = ["json"] }
url = "2.5.2"
ureq = { version = "2.9.7", features = ["json"] }
url = "2.5.0"
[dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false }
mimalloc = { version = "0.1.39", default-features = false }
big_s = "1.0.2"
insta = "1.39.0"
insta = "1.34.0"
maplit = "1.0.2"
md5 = "0.7.0"
meili-snap = { path = "../meili-snap" }
@@ -141,3 +141,6 @@ swedish-recomposition = ["charabia/swedish-recomposition"]
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
cuda = ["candle-core/cuda"]
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] }

View File

@@ -30,7 +30,6 @@ fn main() -> Result<(), Box<dyn Error>> {
let index = Index::new(options, dataset)?;
let txn = index.read_txn()?;
let dictionary = index.document_decompression_dictionary(&txn).unwrap();
let mut query = String::new();
while stdin().read_line(&mut query)? > 0 {
for _ in 0..2 {
@@ -50,7 +49,6 @@ fn main() -> Result<(), Box<dyn Error>> {
let start = Instant::now();
let mut ctx = SearchContext::new(&index, &txn)?;
let mut buffer = Vec::new();
let universe = filtered_universe(ctx.index, ctx.txn, &None)?;
let docs = execute_search(
@@ -77,14 +75,11 @@ fn main() -> Result<(), Box<dyn Error>> {
let elapsed = start.elapsed();
println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
if print_documents {
let compressed_documents = index
.compressed_documents(&txn, docs.documents_ids.iter().copied())
let documents = index
.documents(&txn, docs.documents_ids.iter().copied())
.unwrap()
.into_iter()
.map(|(id, compressed_obkv)| {
let obkv = compressed_obkv
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
.map(|(id, obkv)| {
let mut object = serde_json::Map::default();
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
let value = obkv.get(fid).unwrap();
@@ -95,20 +90,17 @@ fn main() -> Result<(), Box<dyn Error>> {
})
.collect::<Vec<_>>();
for (id, document) in compressed_documents {
for (id, document) in documents {
println!("{id}:");
println!("{document}");
}
let compressed_documents = index
.compressed_documents(&txn, docs.documents_ids.iter().copied())
let documents = index
.documents(&txn, docs.documents_ids.iter().copied())
.unwrap()
.into_iter()
.map(|(id, compressed_obkv)| {
.map(|(id, obkv)| {
let mut object = serde_json::Map::default();
let obkv = compressed_obkv
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
.unwrap();
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
let value = obkv.get(fid).unwrap();
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
@@ -118,7 +110,7 @@ fn main() -> Result<(), Box<dyn Error>> {
})
.collect::<Vec<_>>();
println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
for (id, document) in compressed_documents {
for (id, document) in documents {
println!("{id}:");
println!("{document}");
}

View File

@@ -95,7 +95,7 @@ impl<R: io::Read + io::Seek> EnrichedDocumentsBatchCursor<R> {
/// `next_document` advance the document reader until all the documents have been read.
pub fn next_enriched_document(
&mut self,
) -> Result<Option<EnrichedDocument<'_>>, DocumentsBatchCursorError> {
) -> Result<Option<EnrichedDocument>, DocumentsBatchCursorError> {
let document = self.documents.next_document()?;
let document_id = match self.external_ids.move_on_next()? {
Some((_, bytes)) => serde_json::from_slice(bytes).map(Some)?,

View File

@@ -27,7 +27,7 @@ use crate::{FieldId, Object, Result};
const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();
/// Helper function to convert an obkv reader into a JSON object.
pub fn obkv_to_object(obkv: &KvReader<'_, FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
obkv.iter()
.map(|(field_id, value)| {
let field_name = index
@@ -64,7 +64,7 @@ impl DocumentsBatchIndex {
self.0.len()
}
pub fn iter(&self) -> bimap::hash::Iter<'_, FieldId, String> {
pub fn iter(&self) -> bimap::hash::Iter<FieldId, String> {
self.0.iter()
}
@@ -76,7 +76,7 @@ impl DocumentsBatchIndex {
self.0.get_by_right(name).cloned()
}
pub fn recreate_json(&self, document: &obkv::KvReaderU16<'_>) -> Result<Object> {
pub fn recreate_json(&self, document: &obkv::KvReaderU16) -> Result<Object> {
let mut map = Object::new();
for (k, v) in document.iter() {

View File

@@ -52,7 +52,7 @@ impl<'a> PrimaryKey<'a> {
pub fn document_id(
&self,
document: &obkv::KvReader<'_, FieldId>,
document: &obkv::KvReader<FieldId>,
fields: &impl FieldIdMapper,
) -> Result<StdResult<String, DocumentIdExtractionError>> {
match self {
@@ -166,7 +166,7 @@ pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserE
Some(s) => Ok(s.to_string()),
None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
},
Value::Number(number) if !number.is_f64() => Ok(number.to_string()),
Value::Number(number) if number.is_i64() => Ok(number.to_string()),
content => Err(UserError::InvalidDocumentId { document_id: content }),
}
}

View File

@@ -76,7 +76,7 @@ impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
/// `next_document` advance the document reader until all the documents have been read.
pub fn next_document(
&mut self,
) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> {
) -> Result<Option<KvReader<FieldId>>, DocumentsBatchCursorError> {
match self.cursor.move_on_next()? {
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => {
Ok(Some(KvReader::new(value)))
@@ -108,7 +108,7 @@ impl From<serde_json::Error> for DocumentsBatchCursorError {
impl error::Error for DocumentsBatchCursorError {}
impl fmt::Display for DocumentsBatchCursorError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DocumentsBatchCursorError::Grenad(e) => e.fmt(f),
DocumentsBatchCursorError::SerdeJson(e) => e.fmt(f),

View File

@@ -56,7 +56,7 @@ impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> {
Ok(Ok(()))
}
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "a documents, or a sequence of documents.")
}
}

View File

@@ -24,21 +24,17 @@ impl ExternalDocumentsIds {
}
/// Returns `true` if hard and soft external documents lists are empty.
pub fn is_empty(&self, rtxn: &RoTxn<'_>) -> heed::Result<bool> {
pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
self.0.is_empty(rtxn).map_err(Into::into)
}
pub fn get<A: AsRef<str>>(
&self,
rtxn: &RoTxn<'_>,
external_id: A,
) -> heed::Result<Option<u32>> {
pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
self.0.get(rtxn, external_id.as_ref())
}
/// An helper function to debug this type, returns an `HashMap` of both,
/// soft and hard fst maps, combined.
pub fn to_hash_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashMap<String, u32>> {
pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
let mut map = HashMap::default();
for result in self.0.iter(rtxn)? {
let (external, internal) = result?;
@@ -55,11 +51,7 @@ impl ExternalDocumentsIds {
///
/// - If attempting to delete a document that doesn't exist
/// - If attempting to create a document that already exists
pub fn apply(
&self,
wtxn: &mut RwTxn<'_>,
operations: Vec<DocumentOperation>,
) -> heed::Result<()> {
pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
for DocumentOperation { external_id, internal_id, kind } in operations {
match kind {
DocumentOperationKind::Create => {
@@ -77,7 +69,7 @@ impl ExternalDocumentsIds {
}
/// Returns an iterator over all the external ids.
pub fn iter<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<RoIter<'t, Str, BEU32>> {
pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, BEU32>> {
self.0.iter(rtxn)
}
}

View File

@@ -11,7 +11,7 @@ pub enum FacetType {
}
impl fmt::Display for FacetType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
FacetType::String => f.write_str("string"),
FacetType::Number => f.write_str("number"),
@@ -37,7 +37,7 @@ impl FromStr for FacetType {
pub struct InvalidFacetType;
impl fmt::Display for InvalidFacetType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(r#"Invalid facet type, must be "string" or "number""#)
}
}

View File

@@ -20,7 +20,7 @@ impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
type EItem = (u16, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 2);
bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes());

View File

@@ -20,7 +20,7 @@ impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
type EItem = (u32, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 4);
bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes());

View File

@@ -1,89 +0,0 @@
use std::borrow::Cow;
use std::io;
use std::io::ErrorKind;
use heed::BoxedError;
use obkv::KvReaderU16;
use zstd::bulk::{Compressor, Decompressor};
use zstd::dict::{DecoderDictionary, EncoderDictionary};
pub struct CompressedObkvCodec;
impl<'a> heed::BytesDecode<'a> for CompressedObkvCodec {
type DItem = CompressedKvReaderU16<'a>;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Ok(CompressedKvReaderU16(bytes))
}
}
impl heed::BytesEncode<'_> for CompressedObkvCodec {
type EItem = CompressedKvWriterU16;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(&item.0))
}
}
pub struct CompressedKvReaderU16<'a>(&'a [u8]);
impl<'a> CompressedKvReaderU16<'a> {
/// Decompresses the KvReader into the buffer using the provided dictionnary.
pub fn decompress_with<'b>(
&self,
buffer: &'b mut Vec<u8>,
dictionary: &DecoderDictionary,
) -> io::Result<KvReaderU16<'b>> {
const TWO_GIGABYTES: usize = 2 * 1024 * 1024 * 1024;
let mut decompressor = Decompressor::with_prepared_dictionary(dictionary)?;
let mut max_size = self.0.len() * 4;
let size = loop {
buffer.resize(max_size, 0);
match decompressor.decompress_to_buffer(self.0, &mut buffer[..max_size]) {
Ok(size) => break size,
// TODO don't do that !!! But what should I do?
Err(e) if e.kind() == ErrorKind::Other && max_size <= TWO_GIGABYTES => {
max_size *= 2
}
Err(e) => return Err(e),
}
};
Ok(KvReaderU16::new(&buffer[..size]))
}
/// Returns the KvReader like it is not compressed.
/// Happends when there is no dictionary yet.
pub fn as_non_compressed(&self) -> KvReaderU16<'a> {
KvReaderU16::new(self.0)
}
/// Decompresses this KvReader if necessary.
pub fn decompress_with_optional_dictionary<'b>(
&self,
buffer: &'b mut Vec<u8>,
dictionary: Option<&DecoderDictionary>,
) -> io::Result<KvReaderU16<'b>>
where
'a: 'b,
{
match dictionary {
Some(dict) => self.decompress_with(buffer, dict),
None => Ok(self.as_non_compressed()),
}
}
}
pub struct CompressedKvWriterU16(Vec<u8>);
impl CompressedKvWriterU16 {
// TODO ask for a KvReaderU16 here
pub fn new_with_dictionary(input: &[u8], dictionary: &EncoderDictionary) -> io::Result<Self> {
let mut compressor = Compressor::with_prepared_dictionary(dictionary)?;
compressor.compress(input).map(CompressedKvWriterU16)
}
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
}

View File

@@ -35,7 +35,7 @@ where
fn bytes_encode(
(field_id, document_id, value): &'a Self::EItem,
) -> Result<Cow<'a, [u8]>, BoxedError> {
) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(32);
bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes

View File

@@ -24,7 +24,7 @@ impl<'a> BytesDecode<'a> for OrderedF64Codec {
impl heed::BytesEncode<'_> for OrderedF64Codec {
type EItem = f64;
fn bytes_encode(f: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
fn bytes_encode(f: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut buffer = [0u8; 16];
// write the globally ordered float

View File

@@ -21,7 +21,7 @@ impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec {
impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec {
type EItem = (FieldId, u8);
fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(2 + 1);
bytes.extend_from_slice(&field_id.to_be_bytes());
bytes.push(*word_count);

View File

@@ -1,7 +1,6 @@
mod beu16_str_codec;
mod beu32_str_codec;
mod byte_slice_ref;
mod compressed_obkv_codec;
pub mod facet;
mod field_id_word_count_codec;
mod fst_set_codec;
@@ -20,9 +19,6 @@ use thiserror::Error;
pub use self::beu16_str_codec::BEU16StrCodec;
pub use self::beu32_str_codec::BEU32StrCodec;
pub use self::compressed_obkv_codec::{
CompressedKvReaderU16, CompressedKvWriterU16, CompressedObkvCodec,
};
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
pub use self::fst_set_codec::FstSetCodec;
pub use self::obkv_codec::ObkvCodec;

View File

@@ -16,7 +16,7 @@ impl<'a> heed::BytesDecode<'a> for ObkvCodec {
impl heed::BytesEncode<'_> for ObkvCodec {
type EItem = KvWriterU16<Vec<u8>>;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
item.clone().into_inner().map(Cow::Owned).map_err(Into::into)
}
}

View File

@@ -42,7 +42,7 @@ impl BytesDecodeOwned for BoRoaringBitmapCodec {
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut out = Vec::new();
BoRoaringBitmapCodec::serialize_into(item, &mut out);
Ok(Cow::Owned(out))

View File

@@ -167,7 +167,7 @@ impl BytesDecodeOwned for CboRoaringBitmapCodec {
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut vec = Vec::with_capacity(Self::serialized_size(item));
Self::serialize_into(item, &mut vec);
Ok(Cow::Owned(vec))

View File

@@ -26,7 +26,7 @@ impl BytesDecodeOwned for RoaringBitmapCodec {
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(item.serialized_size());
item.serialize_into(&mut bytes)?;
Ok(Cow::Owned(bytes))

View File

@@ -25,7 +25,7 @@ impl RoaringBitmapLenCodec {
}
};
if size > u16::MAX as usize + 1 {
if size > u16::max_value() as usize + 1 {
return Err(io::Error::new(io::ErrorKind::Other, "size is greater than supported"));
}

View File

@@ -25,7 +25,7 @@ impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
type EItem = (Script, Language);
fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let script_name = script.name().as_bytes();
let lan_name = lan.name().as_bytes();

View File

@@ -30,7 +30,7 @@ impl<'a> heed::BytesDecode<'a> for StrBEU32Codec {
impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
type EItem = (&'a str, u32);
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let pos = pos.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + pos.len());
@@ -66,7 +66,7 @@ impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
type EItem = (&'a str, u16);
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let pos = pos.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());

View File

@@ -24,7 +24,7 @@ impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
type EItem = (u8, &'a str, &'a str);
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1.as_bytes());
@@ -51,7 +51,7 @@ impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
type EItem = (u8, &'a [u8], &'a [u8]);
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1);

Some files were not shown because too many files have changed in this diff Show More