Compare commits

..

1 Commits

Author SHA1 Message Date
42bfc67871 with binary quantization 2024-07-08 20:12:56 +02:00
185 changed files with 2189 additions and 10112 deletions

View File

@ -1,5 +1,6 @@
name: Look for flaky tests name: Look for flaky tests
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
on: on:
workflow_dispatch: workflow_dispatch:
schedule: schedule:
@ -9,8 +10,8 @@ jobs:
flaky: flaky:
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:20.04 image: ubuntu:18.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install needed dependencies - name: Install needed dependencies

View File

@ -1,5 +1,6 @@
name: Run the indexing fuzzer name: Run the indexing fuzzer
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
on: on:
push: push:
branches: branches:

View File

@ -15,11 +15,13 @@ jobs:
debian: debian:
name: Publish debian packagge name: Publish debian packagge
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: check-version needs: check-version
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:20.04 image: ubuntu:18.04
steps: steps:
- name: Install needed dependencies - name: Install needed dependencies
run: | run: |

View File

@ -35,10 +35,12 @@ jobs:
publish-linux: publish-linux:
name: Publish binary for Linux name: Publish binary for Linux
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
needs: check-version needs: check-version
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:20.04 image: ubuntu:18.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install needed dependencies - name: Install needed dependencies
@ -125,10 +127,10 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: check-version needs: check-version
env: env:
DEBIAN_FRONTEND: noninteractive ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:20.04 image: ubuntu:18.04
strategy: strategy:
matrix: matrix:
include: include:
@ -162,9 +164,6 @@ jobs:
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
- name: Install a default toolchain that will be used to build cargo cross
run: |
rustup default stable
- name: Cargo build - name: Cargo build
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:

View File

@ -19,11 +19,13 @@ env:
jobs: jobs:
test-linux: test-linux:
name: Tests on ubuntu-20.04 name: Tests on ubuntu-18.04
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:20.04 image: ubuntu:18.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install needed dependencies - name: Install needed dependencies
@ -71,9 +73,11 @@ jobs:
test-all-features: test-all-features:
name: Tests almost all features name: Tests almost all features
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:20.04 image: ubuntu:18.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@ -91,9 +95,11 @@ jobs:
test-disabled-tokenization: test-disabled-tokenization:
name: Test disabled tokenization name: Test disabled tokenization
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
image: ubuntu:20.04 image: ubuntu:18.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@ -115,10 +121,12 @@ jobs:
# We run tests in debug also, to make sure that the debug_assertions are hit # We run tests in debug also, to make sure that the debug_assertions are hit
test-debug: test-debug:
name: Run tests in debug name: Run tests in debug
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:20.04 image: ubuntu:18.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install needed dependencies - name: Install needed dependencies
@ -159,7 +167,7 @@ jobs:
- uses: helix-editor/rust-toolchain@v1 - uses: helix-editor/rust-toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: nightly-2024-07-09 toolchain: nightly-2024-06-25
override: true override: true
components: rustfmt components: rustfmt
- name: Cache dependencies - name: Cache dependencies

View File

@ -52,20 +52,6 @@ cargo test
This command will be triggered to each PR as a requirement for merging it. This command will be triggered to each PR as a requirement for merging it.
#### Faster build
You can set the `LINDERA_CACHE` environment variable to speed up your successive builds by up to 2 minutes.
It'll store some built artifacts in the directory of your choice.
We recommend using the standard `$HOME/.cache/lindera` directory:
```sh
export LINDERA_CACHE=$HOME/.cache/lindera
```
Furthermore, you can improve incremental compilation by setting the `MEILI_NO_VERGEN` environment variable.
Setting this variable will prevent the Meilisearch binary from being rebuilt each time the directory that hosts the Meilisearch repository changes.
Do not enable this environment variable for production builds (as it will break the `version` route, among other things).
#### Snapshot-based tests #### Snapshot-based tests
We are using [insta](https://insta.rs) to perform snapshot-based testing. We are using [insta](https://insta.rs) to perform snapshot-based testing.
@ -77,7 +63,7 @@ Furthermore, we provide some macros on top of insta, notably a way to use snapsh
To effectively debug snapshot-based hashes, we recommend you export the `MEILI_TEST_FULL_SNAPS` environment variable so that snapshot are fully created locally: To effectively debug snapshot-based hashes, we recommend you export the `MEILI_TEST_FULL_SNAPS` environment variable so that snapshot are fully created locally:
```sh ```
export MEILI_TEST_FULL_SNAPS=true # add this to your .bashrc, .zshrc, ... export MEILI_TEST_FULL_SNAPS=true # add this to your .bashrc, .zshrc, ...
``` ```

1665
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
# Compile # Compile
FROM rust:1.79.0-alpine3.20 AS compiler FROM rust:1.75.0-alpine3.18 AS compiler
RUN apk add -q --no-cache build-base openssl-dev RUN apk add -q --update-cache --no-cache build-base openssl-dev
WORKDIR / WORKDIR /
@ -20,12 +20,13 @@ RUN set -eux; \
cargo build --release -p meilisearch -p meilitool cargo build --release -p meilisearch -p meilitool
# Run # Run
FROM alpine:3.20 FROM alpine:3.16
ENV MEILI_HTTP_ADDR 0.0.0.0:7700 ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker ENV MEILI_SERVER_PROVIDER docker
RUN apk add -q --no-cache libgcc tini curl RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere # add meilisearch and meilitool to the `/bin` so you can run it from anywhere
# and it's easy to find. # and it's easy to find.

View File

@ -11,24 +11,24 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.86" anyhow = "1.0.79"
csv = "1.3.0" csv = "1.3.0"
milli = { path = "../milli" } milli = { path = "../milli" }
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.39", default-features = false }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.111", features = ["preserve_order"] }
[dev-dependencies] [dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] } criterion = { version = "0.5.1", features = ["html_reports"] }
rand = "0.8.5" rand = "0.8.5"
rand_chacha = "0.3.1" rand_chacha = "0.3.1"
roaring = "0.10.6" roaring = "0.10.2"
[build-dependencies] [build-dependencies]
anyhow = "1.0.86" anyhow = "1.0.79"
bytes = "1.6.0" bytes = "1.5.0"
convert_case = "0.6.0" convert_case = "0.6.0"
flate2 = "1.0.30" flate2 = "1.0.28"
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false } reqwest = { version = "0.11.23", features = ["blocking", "rustls-tls"], default-features = false }
[features] [features]
default = ["milli/all-tokenizations"] default = ["milli/all-tokenizations"]

View File

@ -1,5 +1,5 @@
status = [ status = [
'Tests on ubuntu-20.04', 'Tests on ubuntu-18.04',
'Tests on macos-12', 'Tests on macos-12',
'Tests on windows-2022', 'Tests on windows-2022',
'Run Clippy', 'Run Clippy',

View File

@ -11,8 +11,8 @@ license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
time = { version = "0.3.36", features = ["parsing"] } time = { version = "0.3.34", features = ["parsing"] }
[build-dependencies] [build-dependencies]
anyhow = "1.0.86" anyhow = "1.0.80"
vergen-git2 = "1.0.0" vergen-git2 = "1.0.0-beta.2"

View File

@ -5,13 +5,6 @@ fn main() {
} }
fn emit_git_variables() -> anyhow::Result<()> { fn emit_git_variables() -> anyhow::Result<()> {
println!("cargo::rerun-if-env-changed=MEILI_NO_VERGEN");
let has_vergen =
!matches!(std::env::var_os("MEILI_NO_VERGEN"), Some(x) if x != "false" && x != "0");
anyhow::ensure!(has_vergen, "disabled via `MEILI_NO_VERGEN`");
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them // Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
// in the corresponding GitHub workflow (publish_docker.yml). // in the corresponding GitHub workflow (publish_docker.yml).
// This is due to the Dockerfile building the binary outside of the git directory. // This is due to the Dockerfile building the binary outside of the git directory.

View File

@ -11,21 +11,22 @@ readme.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.86" anyhow = "1.0.79"
flate2 = "1.0.30" flate2 = "1.0.28"
http = "1.1.0" http = "0.2.11"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0" once_cell = "1.19.0"
regex = "1.10.5" regex = "1.10.2"
roaring = { version = "0.10.6", features = ["serde"] } roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.111", features = ["preserve_order"] }
tar = "0.4.41" tar = "0.4.40"
tempfile = "3.10.1" tempfile = "3.9.0"
thiserror = "1.0.61" thiserror = "1.0.56"
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40" tracing = "0.1.40"
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
big_s = "1.0.2" big_s = "1.0.2"

View File

@ -104,11 +104,6 @@ pub enum KindDump {
DocumentDeletionByFilter { DocumentDeletionByFilter {
filter: serde_json::Value, filter: serde_json::Value,
}, },
DocumentEdition {
filter: Option<serde_json::Value>,
context: Option<serde_json::Map<String, serde_json::Value>>,
function: String,
},
Settings { Settings {
settings: Box<meilisearch_types::settings::Settings<Unchecked>>, settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
is_deletion: bool, is_deletion: bool,
@ -177,9 +172,6 @@ impl From<KindWithContent> for KindDump {
KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => { KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
KindDump::DocumentDeletionByFilter { filter: filter_expr } KindDump::DocumentDeletionByFilter { filter: filter_expr }
} }
KindWithContent::DocumentEdition { filter_expr, context, function, .. } => {
KindDump::DocumentEdition { filter: filter_expr, context, function }
}
KindWithContent::DocumentClear { .. } => KindDump::DocumentClear, KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
KindWithContent::SettingsUpdate { KindWithContent::SettingsUpdate {
new_settings, new_settings,

View File

@ -425,7 +425,7 @@ pub(crate) mod test {
let mut dump = v2::V2Reader::open(dir).unwrap().to_v3(); let mut dump = v2::V2Reader::open(dir).unwrap().to_v3();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
// tasks // tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@ -358,7 +358,7 @@ pub(crate) mod test {
let mut dump = v3::V3Reader::open(dir).unwrap().to_v4(); let mut dump = v3::V3Reader::open(dir).unwrap().to_v4();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
// tasks // tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@ -394,8 +394,8 @@ pub(crate) mod test {
let mut dump = v4::V4Reader::open(dir).unwrap().to_v5(); let mut dump = v4::V4Reader::open(dir).unwrap().to_v5();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks // tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@ -442,8 +442,8 @@ pub(crate) mod test {
let mut dump = v5::V5Reader::open(dir).unwrap().to_v6(); let mut dump = v5::V5Reader::open(dir).unwrap().to_v6();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks // tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();

View File

@ -216,7 +216,7 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap(); let mut dump = DumpReader::open(dump).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// tasks // tasks
@ -337,7 +337,7 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap(); let mut dump = DumpReader::open(dump).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// tasks // tasks
@ -383,8 +383,8 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap(); let mut dump = DumpReader::open(dump).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks // tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
@ -463,8 +463,8 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap(); let mut dump = DumpReader::open(dump).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks // tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
@ -540,7 +540,7 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap(); let mut dump = DumpReader::open(dump).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None); assert_eq!(dump.instance_uid().unwrap(), None);
// tasks // tasks
@ -633,7 +633,7 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap(); let mut dump = DumpReader::open(dump).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None); assert_eq!(dump.instance_uid().unwrap(), None);
// tasks // tasks
@ -726,7 +726,7 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap(); let mut dump = DumpReader::open(dump).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None); assert_eq!(dump.instance_uid().unwrap(), None);
// tasks // tasks

View File

@ -252,7 +252,7 @@ pub(crate) mod test {
let mut dump = V2Reader::open(dir).unwrap(); let mut dump = V2Reader::open(dir).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
// tasks // tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
@ -349,7 +349,7 @@ pub(crate) mod test {
let mut dump = V2Reader::open(dir).unwrap(); let mut dump = V2Reader::open(dir).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
// tasks // tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@ -267,7 +267,7 @@ pub(crate) mod test {
let mut dump = V3Reader::open(dir).unwrap(); let mut dump = V3Reader::open(dir).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
// tasks // tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@ -262,8 +262,8 @@ pub(crate) mod test {
let mut dump = V4Reader::open(dir).unwrap(); let mut dump = V4Reader::open(dir).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks // tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@ -299,8 +299,8 @@ pub(crate) mod test {
let mut dump = V5Reader::open(dir).unwrap(); let mut dump = V5Reader::open(dir).unwrap();
// top level infos // top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks // tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@ -281,7 +281,7 @@ pub(crate) mod test {
let dump_path = dump.path(); let dump_path = dump.path();
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few) // ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###" insta::assert_display_snapshot!(create_directory_hierarchy(dump_path), @r###"
. .
├---- indexes/ ├---- indexes/
│ └---- doggos/ │ └---- doggos/

View File

@ -11,7 +11,10 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
tempfile = "3.10.1" tempfile = "3.9.0"
thiserror = "1.0.61" thiserror = "1.0.56"
tracing = "0.1.40" tracing = "0.1.40"
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies]
faux = "0.1.10"

View File

@ -14,7 +14,7 @@ license.workspace = true
[dependencies] [dependencies]
nom = "7.1.3" nom = "7.1.3"
nom_locate = "4.2.0" nom_locate = "4.2.0"
unescaper = "0.1.5" unescaper = "0.1.3"
[dev-dependencies] [dev-dependencies]
insta = "1.39.0" insta = "1.34.0"

View File

@ -26,7 +26,6 @@ pub enum Condition<'a> {
LowerThan(Token<'a>), LowerThan(Token<'a>),
LowerThanOrEqual(Token<'a>), LowerThanOrEqual(Token<'a>),
Between { from: Token<'a>, to: Token<'a> }, Between { from: Token<'a>, to: Token<'a> },
Contains { keyword: Token<'a>, word: Token<'a> },
} }
/// condition = value ("==" | ">" ...) value /// condition = value ("==" | ">" ...) value
@ -93,34 +92,6 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists })))) Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
} }
/// contains = value "CONTAINS" value
pub fn parse_contains(input: Span) -> IResult<FilterCondition> {
let (input, (fid, contains, value)) =
tuple((parse_value, tag("CONTAINS"), cut(parse_value)))(input)?;
Ok((
input,
FilterCondition::Condition {
fid,
op: Contains { keyword: Token { span: contains, value: None }, word: value },
},
))
}
/// contains = value "NOT" WS+ "CONTAINS" value
pub fn parse_not_contains(input: Span) -> IResult<FilterCondition> {
let keyword = tuple((tag("NOT"), multispace1, tag("CONTAINS")));
let (input, (fid, (_not, _spaces, contains), value)) =
tuple((parse_value, keyword, cut(parse_value)))(input)?;
Ok((
input,
FilterCondition::Not(Box::new(FilterCondition::Condition {
fid,
op: Contains { keyword: Token { span: contains, value: None }, word: value },
})),
))
}
/// to = value value "TO" WS+ value /// to = value value "TO" WS+ value
pub fn parse_to(input: Span) -> IResult<FilterCondition> { pub fn parse_to(input: Span) -> IResult<FilterCondition> {
let (input, (key, from, _, _, to)) = let (input, (key, from, _, _, to)) =

View File

@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> {
} }
ErrorKind::InvalidPrimary => { ErrorKind::InvalidPrimary => {
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) }; let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)? writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
} }
ErrorKind::InvalidEscapedNumber => { ErrorKind::InvalidEscapedNumber => {
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)? writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?

View File

@ -48,8 +48,8 @@ use std::fmt::Debug;
pub use condition::{parse_condition, parse_to, Condition}; pub use condition::{parse_condition, parse_to, Condition};
use condition::{ use condition::{
parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null,
parse_is_null, parse_not_contains, parse_not_exists, parse_not_exists,
}; };
use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
pub use error::{Error, ErrorKind}; pub use error::{Error, ErrorKind};
@ -147,37 +147,7 @@ pub enum FilterCondition<'a> {
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] }, GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
} }
pub enum TraversedElement<'a> {
FilterCondition(&'a FilterCondition<'a>),
Condition(&'a Condition<'a>),
}
impl<'a> FilterCondition<'a> { impl<'a> FilterCondition<'a> {
pub fn use_contains_operator(&self) -> Option<&Token> {
match self {
FilterCondition::Condition { fid: _, op } => match op {
Condition::GreaterThan(_)
| Condition::GreaterThanOrEqual(_)
| Condition::Equal(_)
| Condition::NotEqual(_)
| Condition::Null
| Condition::Empty
| Condition::Exists
| Condition::LowerThan(_)
| Condition::LowerThanOrEqual(_)
| Condition::Between { .. } => None,
Condition::Contains { keyword, word: _ } => Some(keyword),
},
FilterCondition::Not(this) => this.use_contains_operator(),
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
seq.iter().find_map(|filter| filter.use_contains_operator())
}
FilterCondition::GeoLowerThan { .. }
| FilterCondition::GeoBoundingBox { .. }
| FilterCondition::In { .. } => None,
}
}
/// Returns the first token found at the specified depth, `None` if no token at this depth. /// Returns the first token found at the specified depth, `None` if no token at this depth.
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> { pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
match self { match self {
@ -482,8 +452,6 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
parse_exists, parse_exists,
parse_not_exists, parse_not_exists,
parse_to, parse_to,
parse_contains,
parse_not_contains,
// the next lines are only for error handling and are written at the end to have the less possible performance impact // the next lines are only for error handling and are written at the end to have the less possible performance impact
parse_geo, parse_geo,
parse_geo_distance, parse_geo_distance,
@ -566,7 +534,6 @@ impl<'a> std::fmt::Display for Condition<'a> {
Condition::LowerThan(token) => write!(f, "< {token}"), Condition::LowerThan(token) => write!(f, "< {token}"),
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
Condition::Between { from, to } => write!(f, "{from} TO {to}"), Condition::Between { from, to } => write!(f, "{from} TO {to}"),
Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"),
} }
} }
} }
@ -591,135 +558,127 @@ pub mod tests {
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into() unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
} }
#[track_caller]
fn p(s: &str) -> impl std::fmt::Display + '_ { fn p(s: &str) -> impl std::fmt::Display + '_ {
Fc::parse(s).unwrap().unwrap() Fc::parse(s).unwrap().unwrap()
} }
#[test] #[test]
fn parse_escaped() { fn parse_escaped() {
insta::assert_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#); insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
insta::assert_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#); insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
insta::assert_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#); insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
insta::assert_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#); insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequences // but it also works with other sequences
insta::assert_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}"); insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
} }
#[test] #[test]
fn parse() { fn parse() {
// Test equal // Test equal
insta::assert_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}"); insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
insta::assert_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}"); insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
insta::assert_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}"); insta::assert_display_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}");
insta::assert_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}"); insta::assert_display_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}");
insta::assert_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}"); insta::assert_display_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}");
insta::assert_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}"); insta::assert_display_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}");
insta::assert_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}"); insta::assert_display_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}");
insta::assert_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}"); insta::assert_display_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
insta::assert_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}"); insta::assert_display_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
// Test IN // Test IN
insta::assert_snapshot!(p("colour IN[]"), @"{colour} IN[]"); insta::assert_display_snapshot!(p("colour IN[]"), @"{colour} IN[]");
insta::assert_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]"); insta::assert_display_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]");
insta::assert_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]"); insta::assert_display_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]");
insta::assert_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])"); insta::assert_display_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])");
insta::assert_snapshot!(p(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]"); insta::assert_display_snapshot!(p(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]");
// Test IN + OR/AND/() // Test IN + OR/AND/()
insta::assert_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]"); insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
insta::assert_snapshot!(p("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]"); insta::assert_display_snapshot!(p("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]");
insta::assert_snapshot!(p("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]"); insta::assert_display_snapshot!(p("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]");
// Test whitespace start/end // Test whitespace start/end
insta::assert_snapshot!(p(" colour = green "), @"{colour} = {green}"); insta::assert_display_snapshot!(p(" colour = green "), @"{colour} = {green}");
insta::assert_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]"); insta::assert_display_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]");
insta::assert_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]"); insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
insta::assert_snapshot!(p(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])"); insta::assert_display_snapshot!(p(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])");
insta::assert_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]"); insta::assert_display_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]");
// Test conditions // Test conditions
insta::assert_snapshot!(p("channel != ponce"), @"{channel} != {ponce}"); insta::assert_display_snapshot!(p("channel != ponce"), @"{channel} != {ponce}");
insta::assert_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})"); insta::assert_display_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})");
insta::assert_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}"); insta::assert_display_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}");
insta::assert_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}"); insta::assert_display_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}");
insta::assert_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}"); insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
insta::assert_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}"); insta::assert_display_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}");
insta::assert_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}"); insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
insta::assert_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}"); insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
// Test NOT // Test NOT
insta::assert_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})"); insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
insta::assert_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})"); insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
// Test NULL + NOT NULL // Test NULL + NOT NULL
insta::assert_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL"); insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
insta::assert_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)"); insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
insta::assert_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)"); insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
insta::assert_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL"); insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
insta::assert_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)"); insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
// Test EMPTY + NOT EMPTY // Test EMPTY + NOT EMPTY
insta::assert_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY"); insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
insta::assert_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)"); insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
insta::assert_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)"); insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
insta::assert_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY"); insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
insta::assert_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)"); insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
// Test EXISTS + NOT EXITS // Test EXISTS + NOT EXITS
insta::assert_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS"); insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
insta::assert_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)"); insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS"); insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
insta::assert_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
// Test CONTAINS + NOT CONTAINS
insta::assert_snapshot!(p("subscribers CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}");
insta::assert_snapshot!(p("NOT subscribers CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})");
insta::assert_snapshot!(p("subscribers NOT CONTAINS hello"), @"NOT ({subscribers} CONTAINS {hello})");
insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}");
insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})");
// Test nested NOT // Test nested NOT
insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}"); insta::assert_display_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
// Test geo radius // Test geo radius
insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})"); insta::assert_display_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))"); insta::assert_display_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})"); insta::assert_display_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
// Test geo bounding box // Test geo bounding box
insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])"); insta::assert_display_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))"); insta::assert_display_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])"); insta::assert_display_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
// Test OR + AND // Test OR + AND
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); insta::assert_display_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]"); insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]");
insta::assert_snapshot!( insta::assert_display_snapshot!(
p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"), p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"),
@"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]" @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]"
); );
// Test parentheses // Test parentheses
insta::assert_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]"); insta::assert_display_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]");
insta::assert_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]"); insta::assert_display_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]");
// Test recursion // Test recursion
// This is the most that is allowed // This is the most that is allowed
insta::assert_snapshot!( insta::assert_display_snapshot!(
p("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"), p("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"),
@"{x} = {1}" @"{x} = {1}"
); );
insta::assert_snapshot!( insta::assert_display_snapshot!(
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"), p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
@"NOT ({x} = {1})" @"NOT ({x} = {1})"
); );
// Confusing keywords // Confusing keywords
insta::assert_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]"); insta::assert_display_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]");
} }
#[test] #[test]
@ -730,182 +689,182 @@ pub mod tests {
Fc::parse(s).unwrap_err().to_string() Fc::parse(s).unwrap_err().to_string()
} }
insta::assert_snapshot!(p("channel = Ponce = 12"), @r###" insta::assert_display_snapshot!(p("channel = Ponce = 12"), @r###"
Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule. Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule.
17:21 channel = Ponce = 12 17:21 channel = Ponce = 12
"###); "###);
insta::assert_snapshot!(p("channel = "), @r###" insta::assert_display_snapshot!(p("channel = "), @r###"
Was expecting a value but instead got nothing. Was expecting a value but instead got nothing.
14:14 channel = 14:14 channel =
"###); "###);
insta::assert_snapshot!(p("channel = 🐻"), @r###" insta::assert_display_snapshot!(p("channel = 🐻"), @r###"
Was expecting a value but instead got `🐻`. Was expecting a value but instead got `🐻`.
11:12 channel = 🐻 11:12 channel = 🐻
"###); "###);
insta::assert_snapshot!(p("channel = 🐻 AND followers < 100"), @r###" insta::assert_display_snapshot!(p("channel = 🐻 AND followers < 100"), @r###"
Was expecting a value but instead got `🐻`. Was expecting a value but instead got `🐻`.
11:12 channel = 🐻 AND followers < 100 11:12 channel = 🐻 AND followers < 100
"###); "###);
insta::assert_snapshot!(p("'OR'"), @r###" insta::assert_display_snapshot!(p("'OR'"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
1:5 'OR' 1:5 'OR'
"###); "###);
insta::assert_snapshot!(p("OR"), @r###" insta::assert_display_snapshot!(p("OR"), @r###"
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes. Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
1:3 OR 1:3 OR
"###); "###);
insta::assert_snapshot!(p("channel Ponce"), @r###" insta::assert_display_snapshot!(p("channel Ponce"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
1:14 channel Ponce 1:14 channel Ponce
"###); "###);
insta::assert_snapshot!(p("channel = Ponce OR"), @r###" insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
19:19 channel = Ponce OR 19:19 channel = Ponce OR
"###); "###);
insta::assert_snapshot!(p("_geoRadius"), @r###" insta::assert_display_snapshot!(p("_geoRadius"), @r###"
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`. The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
1:11 _geoRadius 1:11 _geoRadius
"###); "###);
insta::assert_snapshot!(p("_geoRadius = 12"), @r###" insta::assert_display_snapshot!(p("_geoRadius = 12"), @r###"
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`. The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
1:16 _geoRadius = 12 1:16 _geoRadius = 12
"###); "###);
insta::assert_snapshot!(p("_geoBoundingBox"), @r###" insta::assert_display_snapshot!(p("_geoBoundingBox"), @r###"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`. The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:16 _geoBoundingBox 1:16 _geoBoundingBox
"###); "###);
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r###" insta::assert_display_snapshot!(p("_geoBoundingBox = 12"), @r###"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`. The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:21 _geoBoundingBox = 12 1:21 _geoBoundingBox = 12
"###); "###);
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###" insta::assert_display_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`. The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:26 _geoBoundingBox(1.0, 1.0) 1:26 _geoBoundingBox(1.0, 1.0)
"###); "###);
insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###" insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates. `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
1:22 _geoPoint(12, 13, 14) 1:22 _geoPoint(12, 13, 14)
"###); "###);
insta::assert_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###" insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates. `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
13:34 position <= _geoPoint(12, 13, 14) 13:34 position <= _geoPoint(12, 13, 14)
"###); "###);
insta::assert_snapshot!(p("_geoDistance(12, 13, 14)"), @r###" insta::assert_display_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates. `_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
1:25 _geoDistance(12, 13, 14) 1:25 _geoDistance(12, 13, 14)
"###); "###);
insta::assert_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###" insta::assert_display_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates. `_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
13:37 position <= _geoDistance(12, 13, 14) 13:37 position <= _geoDistance(12, 13, 14)
"###); "###);
insta::assert_snapshot!(p("_geo(12, 13, 14)"), @r###" insta::assert_display_snapshot!(p("_geo(12, 13, 14)"), @r###"
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates. `_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
1:17 _geo(12, 13, 14) 1:17 _geo(12, 13, 14)
"###); "###);
insta::assert_snapshot!(p("position <= _geo(12, 13, 14)"), @r###" insta::assert_display_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates. `_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
13:29 position <= _geo(12, 13, 14) 13:29 position <= _geo(12, 13, 14)
"###); "###);
insta::assert_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###" insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
The `_geoRadius` filter is an operation and can't be used as a value. The `_geoRadius` filter is an operation and can't be used as a value.
13:35 position <= _geoRadius(12, 13, 14) 13:35 position <= _geoRadius(12, 13, 14)
"###); "###);
insta::assert_snapshot!(p("channel = 'ponce"), @r###" insta::assert_display_snapshot!(p("channel = 'ponce"), @r###"
Expression `\'ponce` is missing the following closing delimiter: `'`. Expression `\'ponce` is missing the following closing delimiter: `'`.
11:17 channel = 'ponce 11:17 channel = 'ponce
"###); "###);
insta::assert_snapshot!(p("channel = \"ponce"), @r###" insta::assert_display_snapshot!(p("channel = \"ponce"), @r###"
Expression `\"ponce` is missing the following closing delimiter: `"`. Expression `\"ponce` is missing the following closing delimiter: `"`.
11:17 channel = "ponce 11:17 channel = "ponce
"###); "###);
insta::assert_snapshot!(p("channel = mv OR (followers >= 1000"), @r###" insta::assert_display_snapshot!(p("channel = mv OR (followers >= 1000"), @r###"
Expression `(followers >= 1000` is missing the following closing delimiter: `)`. Expression `(followers >= 1000` is missing the following closing delimiter: `)`.
17:35 channel = mv OR (followers >= 1000 17:35 channel = mv OR (followers >= 1000
"###); "###);
insta::assert_snapshot!(p("channel = mv OR followers >= 1000)"), @r###" insta::assert_display_snapshot!(p("channel = mv OR followers >= 1000)"), @r###"
Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule. Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule.
34:35 channel = mv OR followers >= 1000) 34:35 channel = mv OR followers >= 1000)
"###); "###);
insta::assert_snapshot!(p("colour NOT EXIST"), @r###" insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
1:17 colour NOT EXIST 1:17 colour NOT EXIST
"###); "###);
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###" insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
1:23 subscribers 100 TO1000 1:23 subscribers 100 TO1000
"###); "###);
insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###" insta::assert_display_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule. Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
17:44 channel = ponce ORdog != 'bernese mountain' 17:44 channel = ponce ORdog != 'bernese mountain'
"###); "###);
insta::assert_snapshot!(p("colour IN blue, green]"), @r###" insta::assert_display_snapshot!(p("colour IN blue, green]"), @r###"
Expected `[` after `IN` keyword. Expected `[` after `IN` keyword.
11:23 colour IN blue, green] 11:23 colour IN blue, green]
"###); "###);
insta::assert_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###" insta::assert_display_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###"
Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`. Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`.
32:36 colour IN [blue, green, 'blue' > 2] 32:36 colour IN [blue, green, 'blue' > 2]
"###); "###);
insta::assert_snapshot!(p("colour IN [blue, green, AND]"), @r###" insta::assert_display_snapshot!(p("colour IN [blue, green, AND]"), @r###"
Expected only comma-separated field names inside `IN[..]` but instead found `AND]`. Expected only comma-separated field names inside `IN[..]` but instead found `AND]`.
25:29 colour IN [blue, green, AND] 25:29 colour IN [blue, green, AND]
"###); "###);
insta::assert_snapshot!(p("colour IN [blue, green"), @r###" insta::assert_display_snapshot!(p("colour IN [blue, green"), @r###"
Expected matching `]` after the list of field names given to `IN[` Expected matching `]` after the list of field names given to `IN[`
23:23 colour IN [blue, green 23:23 colour IN [blue, green
"###); "###);
insta::assert_snapshot!(p("colour IN ['blue, green"), @r###" insta::assert_display_snapshot!(p("colour IN ['blue, green"), @r###"
Expression `\'blue, green` is missing the following closing delimiter: `'`. Expression `\'blue, green` is missing the following closing delimiter: `'`.
12:24 colour IN ['blue, green 12:24 colour IN ['blue, green
"###); "###);
insta::assert_snapshot!(p("x = EXISTS"), @r###" insta::assert_display_snapshot!(p("x = EXISTS"), @r###"
Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes. Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes.
5:11 x = EXISTS 5:11 x = EXISTS
"###); "###);
insta::assert_snapshot!(p("AND = 8"), @r###" insta::assert_display_snapshot!(p("AND = 8"), @r###"
Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes. Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes.
1:4 AND = 8 1:4 AND = 8
"###); "###);
insta::assert_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###" insta::assert_display_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###"
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions. The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
51:106 ((((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))) 51:106 ((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))
"###); "###);
insta::assert_snapshot!( insta::assert_display_snapshot!(
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"), p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
@r###" @r###"
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions. The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
@ -913,41 +872,41 @@ pub mod tests {
"### "###
); );
insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###" insta::assert_display_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes. Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS 5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
"###); "###);
insta::assert_snapshot!(p(r#"value NULL"#), @r###" insta::assert_display_snapshot!(p(r#"value NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
1:11 value NULL 1:11 value NULL
"###); "###);
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###" insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
1:15 value NOT NULL 1:15 value NOT NULL
"###); "###);
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###" insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
1:12 value EMPTY 1:12 value EMPTY
"###); "###);
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###" insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
1:16 value NOT EMPTY 1:16 value NOT EMPTY
"###); "###);
insta::assert_snapshot!(p(r#"value IS"#), @r###" insta::assert_display_snapshot!(p(r#"value IS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
1:9 value IS 1:9 value IS
"###); "###);
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###" insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
1:13 value IS NOT 1:13 value IS NOT
"###); "###);
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###" insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
1:16 value IS EXISTS 1:16 value IS EXISTS
"###); "###);
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###" insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
1:20 value IS NOT EXISTS 1:20 value IS NOT EXISTS
"###); "###);
} }

View File

@ -211,7 +211,6 @@ fn is_keyword(s: &str) -> bool {
| "IS" | "IS"
| "NULL" | "NULL"
| "EMPTY" | "EMPTY"
| "CONTAINS"
| "_geoRadius" | "_geoRadius"
| "_geoBoundingBox" | "_geoBoundingBox"
) )

View File

@ -12,9 +12,9 @@ license.workspace = true
[dependencies] [dependencies]
arbitrary = { version = "1.3.2", features = ["derive"] } arbitrary = { version = "1.3.2", features = ["derive"] }
clap = { version = "4.5.9", features = ["derive"] } clap = { version = "4.4.17", features = ["derive"] }
fastrand = "2.1.0" fastrand = "2.0.1"
milli = { path = "../milli" } milli = { path = "../milli" }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.111", features = ["preserve_order"] }
tempfile = "3.10.1" tempfile = "3.9.0"

View File

@ -11,38 +11,38 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.86" anyhow = "1.0.79"
bincode = "1.3.3" bincode = "1.3.3"
csv = "1.3.0" csv = "1.3.0"
derive_builder = "0.20.0" derive_builder = "0.12.0"
dump = { path = "../dump" } dump = { path = "../dump" }
enum-iterator = "2.1.0" enum-iterator = "1.5.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.30" flate2 = "1.0.28"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.6.0" page_size = "0.5.0"
rayon = "1.10.0" rayon = "1.8.1"
roaring = { version = "0.10.6", features = ["serde"] } roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.111", features = ["preserve_order"] }
synchronoise = "1.0.1" synchronoise = "1.0.1"
tempfile = "3.10.1" tempfile = "3.9.0"
thiserror = "1.0.61" thiserror = "1.0.56"
time = { version = "0.3.36", features = [ time = { version = "0.3.31", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
"macros", "macros",
] } ] }
tracing = "0.1.40" tracing = "0.1.40"
ureq = "2.10.0" ureq = "2.9.7"
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
arroy = "0.4.0" arroy = "0.4.0"
big_s = "1.0.2" big_s = "1.0.2"
crossbeam = "0.8.4" crossbeam = "0.8.4"
insta = { version = "1.39.0", features = ["json", "redactions"] } insta = { version = "1.34.0", features = ["json", "redactions"] }
maplit = "1.0.2" maplit = "1.0.2"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }

View File

@ -24,7 +24,6 @@ enum AutobatchKind {
allow_index_creation: bool, allow_index_creation: bool,
primary_key: Option<String>, primary_key: Option<String>,
}, },
DocumentEdition,
DocumentDeletion, DocumentDeletion,
DocumentDeletionByFilter, DocumentDeletionByFilter,
DocumentClear, DocumentClear,
@ -64,7 +63,6 @@ impl From<KindWithContent> for AutobatchKind {
primary_key, primary_key,
.. ..
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key }, } => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion, KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear, KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
KindWithContent::DocumentDeletionByFilter { .. } => { KindWithContent::DocumentDeletionByFilter { .. } => {
@ -100,9 +98,6 @@ pub enum BatchKind {
primary_key: Option<String>, primary_key: Option<String>,
operation_ids: Vec<TaskId>, operation_ids: Vec<TaskId>,
}, },
DocumentEdition {
id: TaskId,
},
DocumentDeletion { DocumentDeletion {
deletion_ids: Vec<TaskId>, deletion_ids: Vec<TaskId>,
}, },
@ -204,7 +199,6 @@ impl BatchKind {
}), }),
allow_index_creation, allow_index_creation,
), ),
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
K::DocumentDeletion => { K::DocumentDeletion => {
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false) (Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
} }
@ -228,7 +222,7 @@ impl BatchKind {
match (self, kind) { match (self, kind) {
// We don't batch any of these operations // We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition | K::DocumentDeletionByFilter) => Break(this), (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentDeletionByFilter) => Break(this),
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists. // We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => { (this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
Break(this) Break(this)
@ -525,7 +519,6 @@ impl BatchKind {
| BatchKind::IndexDeletion { .. } | BatchKind::IndexDeletion { .. }
| BatchKind::IndexUpdate { .. } | BatchKind::IndexUpdate { .. }
| BatchKind::IndexSwap { .. } | BatchKind::IndexSwap { .. }
| BatchKind::DocumentEdition { .. }
| BatchKind::DocumentDeletionByFilter { .. }, | BatchKind::DocumentDeletionByFilter { .. },
_, _,
) => { ) => {

View File

@ -34,7 +34,7 @@ use meilisearch_types::milli::update::{
use meilisearch_types::milli::vector::parsed_vectors::{ use meilisearch_types::milli::vector::parsed_vectors::{
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME, ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
}; };
use meilisearch_types::milli::{self, Filter, Object}; use meilisearch_types::milli::{self, Filter};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@ -106,10 +106,6 @@ pub(crate) enum IndexOperation {
operations: Vec<DocumentOperation>, operations: Vec<DocumentOperation>,
tasks: Vec<Task>, tasks: Vec<Task>,
}, },
DocumentEdition {
index_uid: String,
task: Task,
},
IndexDocumentDeletionByFilter { IndexDocumentDeletionByFilter {
index_uid: String, index_uid: String,
task: Task, task: Task,
@ -168,8 +164,7 @@ impl Batch {
| IndexOperation::DocumentClear { tasks, .. } => { | IndexOperation::DocumentClear { tasks, .. } => {
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
} }
IndexOperation::DocumentEdition { task, .. } IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
| IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
} }
IndexOperation::SettingsAndDocumentOperation { IndexOperation::SettingsAndDocumentOperation {
@ -233,7 +228,6 @@ impl IndexOperation {
pub fn index_uid(&self) -> &str { pub fn index_uid(&self) -> &str {
match self { match self {
IndexOperation::DocumentOperation { index_uid, .. } IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentEdition { index_uid, .. }
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. } | IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. } | IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. } | IndexOperation::Settings { index_uid, .. }
@ -249,9 +243,6 @@ impl fmt::Display for IndexOperation {
IndexOperation::DocumentOperation { .. } => { IndexOperation::DocumentOperation { .. } => {
f.write_str("IndexOperation::DocumentOperation") f.write_str("IndexOperation::DocumentOperation")
} }
IndexOperation::DocumentEdition { .. } => {
f.write_str("IndexOperation::DocumentEdition")
}
IndexOperation::IndexDocumentDeletionByFilter { .. } => { IndexOperation::IndexDocumentDeletionByFilter { .. } => {
f.write_str("IndexOperation::IndexDocumentDeletionByFilter") f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
} }
@ -304,21 +295,6 @@ impl IndexScheduler {
_ => unreachable!(), _ => unreachable!(),
} }
} }
BatchKind::DocumentEdition { id } => {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
match &task.kind {
KindWithContent::DocumentEdition { index_uid, .. } => {
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentEdition {
index_uid: index_uid.clone(),
task,
},
must_create_index: false,
}))
}
_ => unreachable!(),
}
}
BatchKind::DocumentOperation { method, operation_ids, .. } => { BatchKind::DocumentOperation { method, operation_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, operation_ids)?; let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
let primary_key = tasks let primary_key = tasks
@ -1281,7 +1257,6 @@ impl IndexScheduler {
operations, operations,
mut tasks, mut tasks,
} => { } => {
let started_processing_at = std::time::Instant::now();
let mut primary_key_has_been_set = false; let mut primary_key_has_been_set = false;
let must_stop_processing = self.must_stop_processing.clone(); let must_stop_processing = self.must_stop_processing.clone();
let indexer_config = self.index_mapper.indexer_config(); let indexer_config = self.index_mapper.indexer_config();
@ -1396,7 +1371,7 @@ impl IndexScheduler {
if !tasks.iter().all(|res| res.error.is_some()) { if !tasks.iter().all(|res| res.error.is_some()) {
let addition = builder.execute()?; let addition = builder.execute()?;
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); tracing::info!(indexing_result = ?addition, "document indexing done");
} else if primary_key_has_been_set { } else if primary_key_has_been_set {
// Everything failed but we've set a primary key. // Everything failed but we've set a primary key.
// We need to remove it. // We need to remove it.
@ -1411,64 +1386,6 @@ impl IndexScheduler {
Ok(tasks) Ok(tasks)
} }
IndexOperation::DocumentEdition { mut task, .. } => {
let (filter, context, function) =
if let KindWithContent::DocumentEdition {
filter_expr, context, function, ..
} = &task.kind
{
(filter_expr, context, function)
} else {
unreachable!()
};
let result_count = edit_documents_by_function(
index_wtxn,
filter,
context.clone(),
function,
self.index_mapper.indexer_config(),
self.must_stop_processing.clone(),
index,
);
let (original_filter, context, function) = if let Some(Details::DocumentEdition {
original_filter,
context,
function,
..
}) = task.details
{
(original_filter, context, function)
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!();
};
match result_count {
Ok((deleted_documents, edited_documents)) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentEdition {
original_filter,
context,
function,
deleted_documents: Some(deleted_documents),
edited_documents: Some(edited_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentEdition {
original_filter,
context,
function,
deleted_documents: Some(0),
edited_documents: Some(0),
});
task.error = Some(e.into());
}
}
Ok(vec![task])
}
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => { IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let filter = let filter =
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } = if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
@ -1757,44 +1674,3 @@ fn delete_document_by_filter<'a>(
0 0
}) })
} }
fn edit_documents_by_function<'a>(
wtxn: &mut RwTxn<'a>,
filter: &Option<serde_json::Value>,
context: Option<Object>,
code: &str,
indexer_config: &IndexerConfig,
must_stop_processing: MustStopProcessing,
index: &'a Index,
) -> Result<(u64, u64)> {
let candidates = match filter.as_ref().map(Filter::from_json) {
Some(Ok(Some(filter))) => filter.evaluate(wtxn, index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
}
e => e.into(),
})?,
None | Some(Ok(None)) => index.documents_ids(wtxn)?,
Some(Err(e)) => return Err(e.into()),
};
let config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let mut builder = milli::update::IndexDocuments::new(
wtxn,
index,
indexer_config,
config,
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)?;
let (new_builder, count) = builder.edit_documents(&candidates, context, code)?;
builder = new_builder;
let _ = builder.execute()?;
Ok(count.unwrap())
}

View File

@ -68,32 +68,6 @@ impl RoFeatures {
.into()) .into())
} }
} }
pub fn check_edit_documents_by_function(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.edit_documents_by_function {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "edit documents by function",
issue_link: "https://github.com/orgs/meilisearch/discussions/762",
}
.into())
}
}
pub fn check_contains_filter(&self) -> Result<()> {
if self.runtime.contains_filter {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Using `CONTAINS` in a filter",
feature: "contains filter",
issue_link: "https://github.com/orgs/meilisearch/discussions/763",
}
.into())
}
}
} }
impl FeatureData { impl FeatureData {
@ -105,11 +79,9 @@ impl FeatureData {
let txn = env.read_txn()?; let txn = env.read_txn()?;
let persisted_features: RuntimeTogglableFeatures = let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default(); runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: metrics || persisted_features.metrics, metrics: instance_features.metrics || persisted_features.metrics,
logs_route: logs_route || persisted_features.logs_route, logs_route: instance_features.logs_route || persisted_features.logs_route,
contains_filter: contains_filter || persisted_features.contains_filter,
..persisted_features ..persisted_features
})); }));

View File

@ -177,17 +177,6 @@ fn snapshot_details(d: &Details) -> String {
} => { } => {
format!("{{ received_documents: {received_documents}, indexed_documents: {indexed_documents:?} }}") format!("{{ received_documents: {received_documents}, indexed_documents: {indexed_documents:?} }}")
} }
Details::DocumentEdition {
deleted_documents,
edited_documents,
original_filter,
context,
function,
} => {
format!(
"{{ deleted_documents: {deleted_documents:?}, edited_documents: {edited_documents:?}, context: {context:?}, function: {function:?}, original_filter: {original_filter:?} }}"
)
}
Details::SettingsUpdate { settings } => { Details::SettingsUpdate { settings } => {
format!("{{ settings: {settings:?} }}") format!("{{ settings: {settings:?} }}")
} }

View File

@ -662,11 +662,7 @@ impl IndexScheduler {
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, name) self.index_mapper.index(&rtxn, name)
} }
/// Return the boolean referring if index exists.
pub fn index_exists(&self, name: &str) -> Result<bool> {
let rtxn = self.env.read_txn()?;
self.index_mapper.index_exists(&rtxn, name)
}
/// Return the name of all indexes without opening them. /// Return the name of all indexes without opening them.
pub fn index_names(&self) -> Result<Vec<String>> { pub fn index_names(&self) -> Result<Vec<String>> {
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
@ -1607,14 +1603,6 @@ impl<'a> Dump<'a> {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
} }
} }
KindDump::DocumentEdition { filter, context, function } => {
KindWithContent::DocumentEdition {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
filter_expr: filter,
context,
function,
}
}
KindDump::DocumentClear => KindWithContent::DocumentClear { KindDump::DocumentClear => KindWithContent::DocumentClear {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
}, },
@ -3799,15 +3787,15 @@ mod tests {
]); ]);
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks"); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks");
// The index should not exist. // The index should not exists.
snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false"); snapshot!(format!("{}", index_scheduler.index("doggos").map(|_| ()).unwrap_err()), @"Index `doggos` not found.");
} }
#[test] #[test]
fn test_document_addition_cant_create_index_without_index_without_autobatching() { fn test_document_addition_cant_create_index_without_index_without_autobatching() {
// We're going to execute multiple document addition that don't have // We're going to execute multiple document addition that don't have
// the right to create an index while there is no index currently. // the right to create an index while there is no index currently.
// Since the auto-batching is disabled, every task should be processed // Since the autobatching is disabled, every tasks should be processed
// sequentially and throw an IndexDoesNotExists. // sequentially and throw an IndexDoesNotExists.
let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]);
@ -3849,8 +3837,8 @@ mod tests {
handle.advance_n_failed_batches(5); handle.advance_n_failed_batches(5);
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed");
// The index should not exist. // The index should not exists.
snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false"); snapshot!(format!("{}", index_scheduler.index("doggos").map(|_| ()).unwrap_err()), @"Index `doggos` not found.");
} }
#[test] #[test]
@ -4756,7 +4744,6 @@ mod tests {
"types": { "types": {
"documentAdditionOrUpdate": 0, "documentAdditionOrUpdate": 0,
"documentDeletion": 0, "documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0, "dumpCreation": 0,
"indexCreation": 3, "indexCreation": 3,
"indexDeletion": 0, "indexDeletion": 0,
@ -4788,7 +4775,6 @@ mod tests {
"types": { "types": {
"documentAdditionOrUpdate": 0, "documentAdditionOrUpdate": 0,
"documentDeletion": 0, "documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0, "dumpCreation": 0,
"indexCreation": 3, "indexCreation": 3,
"indexDeletion": 0, "indexDeletion": 0,
@ -4827,7 +4813,6 @@ mod tests {
"types": { "types": {
"documentAdditionOrUpdate": 0, "documentAdditionOrUpdate": 0,
"documentDeletion": 0, "documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0, "dumpCreation": 0,
"indexCreation": 3, "indexCreation": 3,
"indexDeletion": 0, "indexDeletion": 0,
@ -4867,7 +4852,6 @@ mod tests {
"types": { "types": {
"documentAdditionOrUpdate": 0, "documentAdditionOrUpdate": 0,
"documentDeletion": 0, "documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0, "dumpCreation": 0,
"indexCreation": 3, "indexCreation": 3,
"indexDeletion": 0, "indexDeletion": 0,

View File

@ -238,7 +238,6 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
let mut index_uids = vec![]; let mut index_uids = vec![];
match &mut task.kind { match &mut task.kind {
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid), K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
K::DocumentEdition { index_uid, .. } => index_uids.push(index_uid),
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid), K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid), K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
K::DocumentClear { index_uid } => index_uids.push(index_uid), K::DocumentClear { index_uid } => index_uids.push(index_uid),
@ -409,26 +408,7 @@ impl IndexScheduler {
match status { match status {
Status::Succeeded => assert!(indexed_documents <= received_documents), Status::Succeeded => assert!(indexed_documents <= received_documents),
Status::Failed | Status::Canceled => assert_eq!(indexed_documents, 0), Status::Failed | Status::Canceled => assert_eq!(indexed_documents, 0),
status => panic!("DocumentAddition can't have an indexed_documents set if it's {}", status), status => panic!("DocumentAddition can't have an indexed_document set if it's {}", status),
}
}
None => {
assert!(matches!(status, Status::Enqueued | Status::Processing))
}
}
}
Details::DocumentEdition { edited_documents, .. } => {
assert_eq!(kind.as_kind(), Kind::DocumentEdition);
match edited_documents {
Some(edited_documents) => {
assert!(matches!(
status,
Status::Succeeded | Status::Failed | Status::Canceled
));
match status {
Status::Succeeded => (),
Status::Failed | Status::Canceled => assert_eq!(edited_documents, 0),
status => panic!("DocumentEdition can't have an edited_documents set if it's {}", status),
} }
} }
None => { None => {

View File

@ -11,6 +11,6 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
insta = { version = "^1.39.0", features = ["json", "redactions"] } insta = { version = "^1.34.0", features = ["json", "redactions"] }
md5 = "0.7.0" md5 = "0.7.0"
once_cell = "1.19" once_cell = "1.19"

View File

@ -11,16 +11,16 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
base64 = "0.22.1" base64 = "0.21.7"
enum-iterator = "2.1.0" enum-iterator = "1.5.0"
hmac = "0.12.1" hmac = "0.12.1"
maplit = "1.0.2" maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5" rand = "0.8.5"
roaring = { version = "0.10.6", features = ["serde"] } roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.111", features = ["preserve_order"] }
sha2 = "0.10.8" sha2 = "0.10.8"
thiserror = "1.0.61" thiserror = "1.0.56"
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }

View File

@ -11,36 +11,36 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
actix-web = { version = "4.8.0", default-features = false } actix-web = { version = "4.6.0", default-features = false }
anyhow = "1.0.86" anyhow = "1.0.79"
convert_case = "0.6.0" convert_case = "0.6.0"
csv = "1.3.0" csv = "1.3.0"
deserr = { version = "0.6.2", features = ["actix-web"] } deserr = { version = "0.6.1", features = ["actix-web"] }
either = { version = "1.13.0", features = ["serde"] } either = { version = "1.9.0", features = ["serde"] }
enum-iterator = "2.1.0" enum-iterator = "1.5.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.30" flate2 = "1.0.28"
fst = "0.4.7" fst = "0.4.7"
memmap2 = "0.9.4" memmap2 = "0.7.1"
milli = { path = "../milli" } milli = { path = "../milli" }
roaring = { version = "0.10.6", features = ["serde"] } roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.195", features = ["derive"] }
serde-cs = "0.2.4" serde-cs = "0.2.4"
serde_json = "1.0.120" serde_json = "1.0.111"
tar = "0.4.41" tar = "0.4.40"
tempfile = "3.10.1" tempfile = "3.9.0"
thiserror = "1.0.61" thiserror = "1.0.56"
time = { version = "0.3.36", features = [ time = { version = "0.3.31", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
"macros", "macros",
] } ] }
tokio = "1.38" tokio = "1.35"
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
insta = "1.39.0" insta = "1.34.0"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
[features] [features]

View File

@ -155,10 +155,6 @@ make_missing_field_convenience_builder!(
MissingFacetSearchFacetName, MissingFacetSearchFacetName,
missing_facet_search_facet_name missing_facet_search_facet_name
); );
make_missing_field_convenience_builder!(
MissingDocumentEditionFunction,
missing_document_edition_function
);
// Integrate a sub-error into a [`DeserrError`] by taking its error message but using // Integrate a sub-error into a [`DeserrError`] by taking its error message but using
// the default error code (C) from `Self` // the default error code (C) from `Self`
@ -192,7 +188,6 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError);
merge_with_error_impl_take_error_message!(ParseTaskKindError); merge_with_error_impl_take_error_message!(ParseTaskKindError);
merge_with_error_impl_take_error_message!(ParseTaskStatusError); merge_with_error_impl_take_error_message!(ParseTaskStatusError);
merge_with_error_impl_take_error_message!(IndexUidFormatError); merge_with_error_impl_take_error_message!(IndexUidFormatError);
merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight);
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold); merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold); merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);

View File

@ -224,7 +224,6 @@ InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ; InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ; InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ;
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ; MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ; InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ; InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
@ -238,11 +237,6 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
InvalidIndexUid , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
@ -342,10 +336,7 @@ UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA
// Experimental features // Experimental features
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ; VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
NotFoundSimilarId , InvalidRequest , BAD_REQUEST ; NotFoundSimilarId , InvalidRequest , BAD_REQUEST
InvalidDocumentEditionContext , InvalidRequest , BAD_REQUEST ;
InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ;
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST
} }
impl ErrorCode for JoinError { impl ErrorCode for JoinError {
@ -415,15 +406,7 @@ impl ErrorCode for milli::Error {
Code::InvalidSettingsTypoTolerance Code::InvalidSettingsTypoTolerance
} }
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder, UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
UserError::VectorEmbeddingError(_) | UserError::DocumentEmbeddingError(_) => { UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
Code::VectorEmbeddingError
}
UserError::DocumentEditionCannotModifyPrimaryKey
| UserError::DocumentEditionDocumentMustBeObject
| UserError::DocumentEditionRuntimeError(_)
| UserError::DocumentEditionCompilationError(_) => {
Code::EditDocumentsByFunctionError
}
} }
} }
} }
@ -519,12 +502,6 @@ impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
} }
} }
impl fmt::Display for deserr_codes::InvalidMultiSearchWeight {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `weight` is invalid, expected a positive float (>= 0.0).")
}
}
impl fmt::Display for deserr_codes::InvalidSimilarId { impl fmt::Display for deserr_codes::InvalidSimilarId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!( write!(

View File

@ -6,13 +6,10 @@ pub struct RuntimeTogglableFeatures {
pub vector_store: bool, pub vector_store: bool,
pub metrics: bool, pub metrics: bool,
pub logs_route: bool, pub logs_route: bool,
pub edit_documents_by_function: bool,
pub contains_filter: bool,
} }
#[derive(Default, Debug, Clone, Copy)] #[derive(Default, Debug, Clone, Copy)]
pub struct InstanceTogglableFeatures { pub struct InstanceTogglableFeatures {
pub metrics: bool, pub metrics: bool,
pub logs_route: bool, pub logs_route: bool,
pub contains_filter: bool,
} }

View File

@ -1,4 +1,3 @@
use milli::Object;
use serde::Serialize; use serde::Serialize;
use time::{Duration, OffsetDateTime}; use time::{Duration, OffsetDateTime};
@ -55,8 +54,6 @@ pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub indexed_documents: Option<Option<u64>>, pub indexed_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub edited_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<Option<String>>, pub primary_key: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub provided_ids: Option<usize>, pub provided_ids: Option<usize>,
@ -73,10 +70,6 @@ pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>, pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub context: Option<Option<Object>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub function: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)] #[serde(flatten)]
pub settings: Option<Box<Settings<Unchecked>>>, pub settings: Option<Box<Settings<Unchecked>>>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
@ -93,20 +86,6 @@ impl From<Details> for DetailsView {
..DetailsView::default() ..DetailsView::default()
} }
} }
Details::DocumentEdition {
deleted_documents,
edited_documents,
original_filter,
context,
function,
} => DetailsView {
deleted_documents: Some(deleted_documents),
edited_documents: Some(edited_documents),
original_filter: Some(original_filter),
context: Some(context),
function: Some(function),
..DetailsView::default()
},
Details::SettingsUpdate { mut settings } => { Details::SettingsUpdate { mut settings } => {
settings.hide_secrets(); settings.hide_secrets();
DetailsView { settings: Some(settings), ..DetailsView::default() } DetailsView { settings: Some(settings), ..DetailsView::default() }

View File

@ -5,7 +5,6 @@ use std::str::FromStr;
use enum_iterator::Sequence; use enum_iterator::Sequence;
use milli::update::IndexDocumentsMethod; use milli::update::IndexDocumentsMethod;
use milli::Object;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize, Serializer}; use serde::{Deserialize, Serialize, Serializer};
use time::{Duration, OffsetDateTime}; use time::{Duration, OffsetDateTime};
@ -49,7 +48,6 @@ impl Task {
| TaskDeletion { .. } | TaskDeletion { .. }
| IndexSwap { .. } => None, | IndexSwap { .. } => None,
DocumentAdditionOrUpdate { index_uid, .. } DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
| DocumentDeletion { index_uid, .. } | DocumentDeletion { index_uid, .. }
| DocumentDeletionByFilter { index_uid, .. } | DocumentDeletionByFilter { index_uid, .. }
| DocumentClear { index_uid } | DocumentClear { index_uid }
@ -69,8 +67,7 @@ impl Task {
pub fn content_uuid(&self) -> Option<Uuid> { pub fn content_uuid(&self) -> Option<Uuid> {
match self.kind { match self.kind {
KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file), KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file),
KindWithContent::DocumentEdition { .. } KindWithContent::DocumentDeletion { .. }
| KindWithContent::DocumentDeletion { .. }
| KindWithContent::DocumentDeletionByFilter { .. } | KindWithContent::DocumentDeletionByFilter { .. }
| KindWithContent::DocumentClear { .. } | KindWithContent::DocumentClear { .. }
| KindWithContent::SettingsUpdate { .. } | KindWithContent::SettingsUpdate { .. }
@ -105,12 +102,6 @@ pub enum KindWithContent {
index_uid: String, index_uid: String,
filter_expr: serde_json::Value, filter_expr: serde_json::Value,
}, },
DocumentEdition {
index_uid: String,
filter_expr: Option<serde_json::Value>,
context: Option<milli::Object>,
function: String,
},
DocumentClear { DocumentClear {
index_uid: String, index_uid: String,
}, },
@ -159,7 +150,6 @@ impl KindWithContent {
pub fn as_kind(&self) -> Kind { pub fn as_kind(&self) -> Kind {
match self { match self {
KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate, KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate,
KindWithContent::DocumentEdition { .. } => Kind::DocumentEdition,
KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion, KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion,
KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion, KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion,
KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion, KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion,
@ -184,7 +174,6 @@ impl KindWithContent {
| TaskCancelation { .. } | TaskCancelation { .. }
| TaskDeletion { .. } => vec![], | TaskDeletion { .. } => vec![],
DocumentAdditionOrUpdate { index_uid, .. } DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
| DocumentDeletion { index_uid, .. } | DocumentDeletion { index_uid, .. }
| DocumentDeletionByFilter { index_uid, .. } | DocumentDeletionByFilter { index_uid, .. }
| DocumentClear { index_uid } | DocumentClear { index_uid }
@ -213,15 +202,6 @@ impl KindWithContent {
indexed_documents: None, indexed_documents: None,
}) })
} }
KindWithContent::DocumentEdition { index_uid: _, filter_expr, context, function } => {
Some(Details::DocumentEdition {
deleted_documents: None,
edited_documents: None,
original_filter: filter_expr.as_ref().map(|v| v.to_string()),
context: context.clone(),
function: function.clone(),
})
}
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
Some(Details::DocumentDeletion { Some(Details::DocumentDeletion {
provided_ids: documents_ids.len(), provided_ids: documents_ids.len(),
@ -270,15 +250,6 @@ impl KindWithContent {
indexed_documents: Some(0), indexed_documents: Some(0),
}) })
} }
KindWithContent::DocumentEdition { index_uid: _, filter_expr, context, function } => {
Some(Details::DocumentEdition {
deleted_documents: Some(0),
edited_documents: Some(0),
original_filter: filter_expr.as_ref().map(|v| v.to_string()),
context: context.clone(),
function: function.clone(),
})
}
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
Some(Details::DocumentDeletion { Some(Details::DocumentDeletion {
provided_ids: documents_ids.len(), provided_ids: documents_ids.len(),
@ -330,7 +301,6 @@ impl From<&KindWithContent> for Option<Details> {
indexed_documents: None, indexed_documents: None,
}) })
} }
KindWithContent::DocumentEdition { .. } => None,
KindWithContent::DocumentDeletion { .. } => None, KindWithContent::DocumentDeletion { .. } => None,
KindWithContent::DocumentDeletionByFilter { .. } => None, KindWithContent::DocumentDeletionByFilter { .. } => None,
KindWithContent::DocumentClear { .. } => None, KindWithContent::DocumentClear { .. } => None,
@ -424,7 +394,6 @@ impl std::error::Error for ParseTaskStatusError {}
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub enum Kind { pub enum Kind {
DocumentAdditionOrUpdate, DocumentAdditionOrUpdate,
DocumentEdition,
DocumentDeletion, DocumentDeletion,
SettingsUpdate, SettingsUpdate,
IndexCreation, IndexCreation,
@ -441,7 +410,6 @@ impl Kind {
pub fn related_to_one_index(&self) -> bool { pub fn related_to_one_index(&self) -> bool {
match self { match self {
Kind::DocumentAdditionOrUpdate Kind::DocumentAdditionOrUpdate
| Kind::DocumentEdition
| Kind::DocumentDeletion | Kind::DocumentDeletion
| Kind::SettingsUpdate | Kind::SettingsUpdate
| Kind::IndexCreation | Kind::IndexCreation
@ -459,7 +427,6 @@ impl Display for Kind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"), Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"),
Kind::DocumentEdition => write!(f, "documentEdition"),
Kind::DocumentDeletion => write!(f, "documentDeletion"), Kind::DocumentDeletion => write!(f, "documentDeletion"),
Kind::SettingsUpdate => write!(f, "settingsUpdate"), Kind::SettingsUpdate => write!(f, "settingsUpdate"),
Kind::IndexCreation => write!(f, "indexCreation"), Kind::IndexCreation => write!(f, "indexCreation"),
@ -487,8 +454,6 @@ impl FromStr for Kind {
Ok(Kind::IndexDeletion) Ok(Kind::IndexDeletion)
} else if kind.eq_ignore_ascii_case("documentAdditionOrUpdate") { } else if kind.eq_ignore_ascii_case("documentAdditionOrUpdate") {
Ok(Kind::DocumentAdditionOrUpdate) Ok(Kind::DocumentAdditionOrUpdate)
} else if kind.eq_ignore_ascii_case("documentEdition") {
Ok(Kind::DocumentEdition)
} else if kind.eq_ignore_ascii_case("documentDeletion") { } else if kind.eq_ignore_ascii_case("documentDeletion") {
Ok(Kind::DocumentDeletion) Ok(Kind::DocumentDeletion)
} else if kind.eq_ignore_ascii_case("settingsUpdate") { } else if kind.eq_ignore_ascii_case("settingsUpdate") {
@ -530,50 +495,16 @@ impl std::error::Error for ParseTaskKindError {}
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub enum Details { pub enum Details {
DocumentAdditionOrUpdate { DocumentAdditionOrUpdate { received_documents: u64, indexed_documents: Option<u64> },
received_documents: u64, SettingsUpdate { settings: Box<Settings<Unchecked>> },
indexed_documents: Option<u64>, IndexInfo { primary_key: Option<String> },
}, DocumentDeletion { provided_ids: usize, deleted_documents: Option<u64> },
SettingsUpdate { DocumentDeletionByFilter { original_filter: String, deleted_documents: Option<u64> },
settings: Box<Settings<Unchecked>>, ClearAll { deleted_documents: Option<u64> },
}, TaskCancelation { matched_tasks: u64, canceled_tasks: Option<u64>, original_filter: String },
IndexInfo { TaskDeletion { matched_tasks: u64, deleted_tasks: Option<u64>, original_filter: String },
primary_key: Option<String>, Dump { dump_uid: Option<String> },
}, IndexSwap { swaps: Vec<IndexSwap> },
DocumentDeletion {
provided_ids: usize,
deleted_documents: Option<u64>,
},
DocumentDeletionByFilter {
original_filter: String,
deleted_documents: Option<u64>,
},
DocumentEdition {
deleted_documents: Option<u64>,
edited_documents: Option<u64>,
original_filter: Option<String>,
context: Option<Object>,
function: String,
},
ClearAll {
deleted_documents: Option<u64>,
},
TaskCancelation {
matched_tasks: u64,
canceled_tasks: Option<u64>,
original_filter: String,
},
TaskDeletion {
matched_tasks: u64,
deleted_tasks: Option<u64>,
original_filter: String,
},
Dump {
dump_uid: Option<String>,
},
IndexSwap {
swaps: Vec<IndexSwap>,
},
} }
impl Details { impl Details {
@ -583,7 +514,6 @@ impl Details {
Self::DocumentAdditionOrUpdate { indexed_documents, .. } => { Self::DocumentAdditionOrUpdate { indexed_documents, .. } => {
*indexed_documents = Some(0) *indexed_documents = Some(0)
} }
Self::DocumentEdition { edited_documents, .. } => *edited_documents = Some(0),
Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0), Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0),
Self::DocumentDeletionByFilter { deleted_documents, .. } => { Self::DocumentDeletionByFilter { deleted_documents, .. } => {
*deleted_documents = Some(0) *deleted_documents = Some(0)

View File

@ -14,101 +14,104 @@ default-run = "meilisearch"
[dependencies] [dependencies]
actix-cors = "0.7.0" actix-cors = "0.7.0"
actix-http = { version = "3.8.0", default-features = false, features = [ actix-http = { version = "3.7.0", default-features = false, features = [
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"rustls-0_23", "rustls-0_21",
] } ] }
actix-utils = "3.0.1" actix-utils = "3.0.1"
actix-web = { version = "4.8.0", default-features = false, features = [ actix-web = { version = "4.6.0", default-features = false, features = [
"macros", "macros",
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"cookies", "cookies",
"rustls-0_23", "rustls-0_21",
] } ] }
anyhow = { version = "1.0.86", features = ["backtrace"] } actix-web-static-files = { version = "4.0.1", optional = true }
async-trait = "0.1.81" anyhow = { version = "1.0.79", features = ["backtrace"] }
bstr = "1.9.1" async-stream = "0.3.5"
byte-unit = { version = "5.1.4", default-features = false, features = [ async-trait = "0.1.77"
bstr = "1.9.0"
byte-unit = { version = "4.0.19", default-features = false, features = [
"std", "std",
"byte",
"serde", "serde",
] } ] }
bytes = "1.6.0" bytes = "1.5.0"
clap = { version = "4.5.9", features = ["derive", "env"] } clap = { version = "4.4.17", features = ["derive", "env"] }
crossbeam-channel = "0.5.13" crossbeam-channel = "0.5.11"
deserr = { version = "0.6.2", features = ["actix-web"] } deserr = { version = "0.6.1", features = ["actix-web"] }
dump = { path = "../dump" } dump = { path = "../dump" }
either = "1.13.0" either = "1.9.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.30" flate2 = "1.0.28"
fst = "0.4.7" fst = "0.4.7"
futures = "0.3.30" futures = "0.3.30"
futures-util = "0.3.30" futures-util = "0.3.30"
http = "0.2.11"
index-scheduler = { path = "../index-scheduler" } index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.2.6", features = ["serde"] } indexmap = { version = "2.1.0", features = ["serde"] }
is-terminal = "0.4.12" is-terminal = "0.4.10"
itertools = "0.13.0" itertools = "0.11.0"
jsonwebtoken = "9.3.0" jsonwebtoken = "9.2.0"
lazy_static = "1.5.0" lazy_static = "1.4.0"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.39", default-features = false }
mime = "0.3.17" mime = "0.3.17"
num_cpus = "1.16.0" num_cpus = "1.16.0"
obkv = "0.2.2" obkv = "0.2.1"
once_cell = "1.19.0" once_cell = "1.19.0"
ordered-float = "4.2.1" ordered-float = "4.2.0"
parking_lot = "0.12.3" parking_lot = "0.12.1"
permissive-json-pointer = { path = "../permissive-json-pointer" } permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.14" pin-project-lite = "0.2.13"
platform-dirs = "0.3.0" platform-dirs = "0.3.0"
prometheus = { version = "0.13.4", features = ["process"] } prometheus = { version = "0.13.3", features = ["process"] }
rand = "0.8.5" rand = "0.8.5"
rayon = "1.10.0" rayon = "1.8.0"
regex = "1.10.5" regex = "1.10.2"
reqwest = { version = "0.12.5", features = [ reqwest = { version = "0.11.23", features = [
"rustls-tls", "rustls-tls",
"json", "json",
], default-features = false } ], default-features = false }
rustls = { version = "0.23.11", features = ["ring"], default-features = false } rustls = "0.21.12"
rustls-pki-types = { version = "1.7.0", features = ["alloc"] } rustls-pemfile = "1.0.2"
rustls-pemfile = "2.1.2" segment = { version = "0.2.3", optional = true }
segment = { version = "0.2.4", optional = true } serde = { version = "1.0.195", features = ["derive"] }
serde = { version = "1.0.204", features = ["derive"] } serde_json = { version = "1.0.111", features = ["preserve_order"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
sha2 = "0.10.8" sha2 = "0.10.8"
siphasher = "1.0.1" siphasher = "1.0.0"
slice-group-by = "0.3.1" slice-group-by = "0.3.1"
static-files = { version = "0.2.4", optional = true } static-files = { version = "0.2.3", optional = true }
sysinfo = "0.30.13" sysinfo = "0.30.5"
tar = "0.4.41" tar = "0.4.40"
tempfile = "3.10.1" tempfile = "3.9.0"
thiserror = "1.0.61" thiserror = "1.0.56"
time = { version = "0.3.36", features = [ time = { version = "0.3.31", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
"macros", "macros",
] } ] }
tokio = { version = "1.38.0", features = ["full"] } tokio = { version = "1.35.1", features = ["full"] }
toml = "0.8.14" tokio-stream = "0.1.14"
uuid = { version = "1.10.0", features = ["serde", "v4"] } toml = "0.8.8"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
walkdir = "2.4.0"
serde_urlencoded = "0.7.1" serde_urlencoded = "0.7.1"
termcolor = "1.4.1" termcolor = "1.4.1"
url = { version = "2.5.2", features = ["serde"] } url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40" tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["json"] } tracing-subscriber = { version = "0.3.18", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" } tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.11" tracing-actix-web = "0.7.10"
build-info = { version = "1.7.0", path = "../build-info" } build-info = { version = "1.7.0", path = "../build-info" }
roaring = "0.10.2"
[dev-dependencies] [dev-dependencies]
actix-rt = "2.10.0" actix-rt = "2.9.0"
assert-json-diff = "2.0.2"
brotli = "6.0.0" brotli = "6.0.0"
insta = "1.39.0" insta = "1.34.0"
manifest-dir-macros = "0.1.18" manifest-dir-macros = "0.1.18"
maplit = "1.0.2" maplit = "1.0.2"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
@ -117,22 +120,23 @@ urlencoding = "2.1.3"
yaup = "0.3.1" yaup = "0.3.1"
[build-dependencies] [build-dependencies]
anyhow = { version = "1.0.86", optional = true } anyhow = { version = "1.0.79", optional = true }
cargo_toml = { version = "0.20.3", optional = true } cargo_toml = { version = "0.18.0", optional = true }
hex = { version = "0.4.3", optional = true } hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.12.5", features = [ reqwest = { version = "0.11.23", features = [
"blocking", "blocking",
"rustls-tls", "rustls-tls",
], default-features = false, optional = true } ], default-features = false, optional = true }
sha-1 = { version = "0.10.1", optional = true } sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.4", optional = true } static-files = { version = "0.2.3", optional = true }
tempfile = { version = "3.10.1", optional = true } tempfile = { version = "3.9.0", optional = true }
zip = { version = "2.1.3", optional = true } zip = { version = "0.6.6", optional = true }
[features] [features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
analytics = ["segment"] analytics = ["segment"]
mini-dashboard = [ mini-dashboard = [
"actix-web-static-files",
"static-files", "static-files",
"anyhow", "anyhow",
"cargo_toml", "cargo_toml",

View File

@ -6,7 +6,7 @@ use meilisearch_types::InstanceUid;
use serde_json::Value; use serde_json::Value;
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind}; use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery}; use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::Opt; use crate::Opt;
pub struct MockAnalytics { pub struct MockAnalytics {
@ -42,7 +42,7 @@ pub struct MultiSearchAggregator;
#[allow(dead_code)] #[allow(dead_code)]
impl MultiSearchAggregator { impl MultiSearchAggregator {
pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self { pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
Self Self
} }
@ -97,13 +97,6 @@ impl Analytics for MockAnalytics {
_request: &HttpRequest, _request: &HttpRequest,
) { ) {
} }
fn update_documents_by_function(
&self,
_documents_query: &DocumentEditionByFunction,
_index_creation: bool,
_request: &HttpRequest,
) {
}
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
} }

View File

@ -13,7 +13,7 @@ use once_cell::sync::Lazy;
use platform_dirs::AppDirs; use platform_dirs::AppDirs;
use serde_json::Value; use serde_json::Value;
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery}; use crate::routes::indexes::documents::UpdateDocumentsQuery;
// if the analytics feature is disabled // if the analytics feature is disabled
// the `SegmentAnalytics` point to the mock instead of the real analytics // the `SegmentAnalytics` point to the mock instead of the real analytics
@ -102,7 +102,7 @@ pub trait Analytics: Sync + Send {
/// This method should be called to aggregate post facet values searches /// This method should be called to aggregate post facet values searches
fn post_facet_search(&self, aggregate: FacetSearchAggregator); fn post_facet_search(&self, aggregate: FacetSearchAggregator);
// this method should be called to aggregate an add documents request // this method should be called to aggregate a add documents request
fn add_documents( fn add_documents(
&self, &self,
documents_query: &UpdateDocumentsQuery, documents_query: &UpdateDocumentsQuery,
@ -119,19 +119,11 @@ pub trait Analytics: Sync + Send {
// this method should be called to aggregate a add documents request // this method should be called to aggregate a add documents request
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest); fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
// this method should be called to batch an update documents request // this method should be called to batch a update documents request
fn update_documents( fn update_documents(
&self, &self,
documents_query: &UpdateDocumentsQuery, documents_query: &UpdateDocumentsQuery,
index_creation: bool, index_creation: bool,
request: &HttpRequest, request: &HttpRequest,
); );
// this method should be called to batch an update documents by function request
fn update_documents_by_function(
&self,
documents_query: &DocumentEditionByFunction,
index_creation: bool,
request: &HttpRequest,
);
} }

View File

@ -5,9 +5,10 @@ use std::path::{Path, PathBuf};
use std::sync::Arc; use std::sync::Arc;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use actix_web::http::header::{CONTENT_TYPE, USER_AGENT}; use actix_web::http::header::USER_AGENT;
use actix_web::HttpRequest; use actix_web::HttpRequest;
use byte_unit::Byte; use byte_unit::Byte;
use http::header::CONTENT_TYPE;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use meilisearch_auth::{AuthController, AuthFilter}; use meilisearch_auth::{AuthController, AuthFilter};
use meilisearch_types::InstanceUid; use meilisearch_types::InstanceUid;
@ -30,12 +31,12 @@ use crate::analytics::Analytics;
use crate::option::{ use crate::option::{
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot, default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
}; };
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery}; use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::indexes::facet_search::FacetSearchQuery; use crate::routes::indexes::facet_search::FacetSearchQuery;
use crate::routes::{create_all_stats, Stats}; use crate::routes::{create_all_stats, Stats};
use crate::search::{ use crate::search::{
FacetSearchResult, FederatedSearch, MatchingStrategy, SearchQuery, SearchQueryWithIndex, FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
SearchResult, SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEMANTIC_RATIO, DEFAULT_SEMANTIC_RATIO,
}; };
@ -80,7 +81,6 @@ pub enum AnalyticsMsg {
AggregateAddDocuments(DocumentsAggregator), AggregateAddDocuments(DocumentsAggregator),
AggregateDeleteDocuments(DocumentsDeletionAggregator), AggregateDeleteDocuments(DocumentsDeletionAggregator),
AggregateUpdateDocuments(DocumentsAggregator), AggregateUpdateDocuments(DocumentsAggregator),
AggregateEditDocumentsByFunction(EditDocumentsByFunctionAggregator),
AggregateGetFetchDocuments(DocumentsFetchAggregator), AggregateGetFetchDocuments(DocumentsFetchAggregator),
AggregatePostFetchDocuments(DocumentsFetchAggregator), AggregatePostFetchDocuments(DocumentsFetchAggregator),
} }
@ -150,7 +150,6 @@ impl SegmentAnalytics {
add_documents_aggregator: DocumentsAggregator::default(), add_documents_aggregator: DocumentsAggregator::default(),
delete_documents_aggregator: DocumentsDeletionAggregator::default(), delete_documents_aggregator: DocumentsDeletionAggregator::default(),
update_documents_aggregator: DocumentsAggregator::default(), update_documents_aggregator: DocumentsAggregator::default(),
edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator::default(),
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(), get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(), post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
get_similar_aggregator: SimilarAggregator::default(), get_similar_aggregator: SimilarAggregator::default(),
@ -231,17 +230,6 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate)); let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
} }
fn update_documents_by_function(
&self,
documents_query: &DocumentEditionByFunction,
index_creation: bool,
request: &HttpRequest,
) {
let aggregate =
EditDocumentsByFunctionAggregator::from_query(documents_query, index_creation, request);
let _ = self.sender.try_send(AnalyticsMsg::AggregateEditDocumentsByFunction(aggregate));
}
fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) { fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request); let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate)); let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate));
@ -261,7 +249,6 @@ impl super::Analytics for SegmentAnalytics {
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
struct Infos { struct Infos {
env: String, env: String,
experimental_contains_filter: bool,
experimental_enable_metrics: bool, experimental_enable_metrics: bool,
experimental_search_queue_size: usize, experimental_search_queue_size: usize,
experimental_logs_mode: LogMode, experimental_logs_mode: LogMode,
@ -304,7 +291,6 @@ impl From<Opt> for Infos {
// Thus we must not insert `..` at the end. // Thus we must not insert `..` at the end.
let Opt { let Opt {
db_path, db_path,
experimental_contains_filter,
experimental_enable_metrics, experimental_enable_metrics,
experimental_search_queue_size, experimental_search_queue_size,
experimental_logs_mode, experimental_logs_mode,
@ -355,7 +341,6 @@ impl From<Opt> for Infos {
// We consider information sensible if it contains a path, an address, or a key. // We consider information sensible if it contains a path, an address, or a key.
Self { Self {
env, env,
experimental_contains_filter,
experimental_enable_metrics, experimental_enable_metrics,
experimental_search_queue_size, experimental_search_queue_size,
experimental_logs_mode, experimental_logs_mode,
@ -405,7 +390,6 @@ pub struct Segment {
add_documents_aggregator: DocumentsAggregator, add_documents_aggregator: DocumentsAggregator,
delete_documents_aggregator: DocumentsDeletionAggregator, delete_documents_aggregator: DocumentsDeletionAggregator,
update_documents_aggregator: DocumentsAggregator, update_documents_aggregator: DocumentsAggregator,
edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator,
get_fetch_documents_aggregator: DocumentsFetchAggregator, get_fetch_documents_aggregator: DocumentsFetchAggregator,
post_fetch_documents_aggregator: DocumentsFetchAggregator, post_fetch_documents_aggregator: DocumentsFetchAggregator,
get_similar_aggregator: SimilarAggregator, get_similar_aggregator: SimilarAggregator,
@ -470,7 +454,6 @@ impl Segment {
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateEditDocumentsByFunction(agreg)) => self.edit_documents_by_function_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg),
@ -526,7 +509,6 @@ impl Segment {
add_documents_aggregator, add_documents_aggregator,
delete_documents_aggregator, delete_documents_aggregator,
update_documents_aggregator, update_documents_aggregator,
edit_documents_by_function_aggregator,
get_fetch_documents_aggregator, get_fetch_documents_aggregator,
post_fetch_documents_aggregator, post_fetch_documents_aggregator,
get_similar_aggregator, get_similar_aggregator,
@ -568,11 +550,6 @@ impl Segment {
{ {
let _ = self.batcher.push(update_documents).await; let _ = self.batcher.push(update_documents).await;
} }
if let Some(edit_documents_by_function) = take(edit_documents_by_function_aggregator)
.into_event(user, "Documents Edited By Function")
{
let _ = self.batcher.push(edit_documents_by_function).await;
}
if let Some(get_fetch_documents) = if let Some(get_fetch_documents) =
take(get_fetch_documents_aggregator).into_event(user, "Documents Fetched GET") take(get_fetch_documents_aggregator).into_event(user, "Documents Fetched GET")
{ {
@ -1098,33 +1075,22 @@ pub struct MultiSearchAggregator {
show_ranking_score: bool, show_ranking_score: bool,
show_ranking_score_details: bool, show_ranking_score_details: bool,
// federation
use_federation: bool,
// context // context
user_agents: HashSet<String>, user_agents: HashSet<String>,
} }
impl MultiSearchAggregator { impl MultiSearchAggregator {
pub fn from_federated_search( pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self {
federated_search: &FederatedSearch,
request: &HttpRequest,
) -> Self {
let timestamp = Some(OffsetDateTime::now_utc()); let timestamp = Some(OffsetDateTime::now_utc());
let user_agents = extract_user_agents(request).into_iter().collect(); let user_agents = extract_user_agents(request).into_iter().collect();
let use_federation = federated_search.federation.is_some(); let distinct_indexes: HashSet<_> = query
let distinct_indexes: HashSet<_> = federated_search
.queries
.iter() .iter()
.map(|query| { .map(|query| {
let query = &query;
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
let SearchQueryWithIndex { let SearchQueryWithIndex {
index_uid, index_uid,
federation_options: _,
q: _, q: _,
vector: _, vector: _,
offset: _, offset: _,
@ -1156,10 +1122,8 @@ impl MultiSearchAggregator {
}) })
.collect(); .collect();
let show_ranking_score = let show_ranking_score = query.iter().any(|query| query.show_ranking_score);
federated_search.queries.iter().any(|query| query.show_ranking_score); let show_ranking_score_details = query.iter().any(|query| query.show_ranking_score_details);
let show_ranking_score_details =
federated_search.queries.iter().any(|query| query.show_ranking_score_details);
Self { Self {
timestamp, timestamp,
@ -1167,11 +1131,10 @@ impl MultiSearchAggregator {
total_succeeded: 0, total_succeeded: 0,
total_distinct_index_count: distinct_indexes.len(), total_distinct_index_count: distinct_indexes.len(),
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
total_search_count: federated_search.queries.len(), total_search_count: query.len(),
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
user_agents, user_agents,
use_federation,
} }
} }
@ -1197,7 +1160,6 @@ impl MultiSearchAggregator {
let show_ranking_score_details = let show_ranking_score_details =
this.show_ranking_score_details || other.show_ranking_score_details; this.show_ranking_score_details || other.show_ranking_score_details;
let mut user_agents = this.user_agents; let mut user_agents = this.user_agents;
let use_federation = this.use_federation || other.use_federation;
for user_agent in other.user_agents.into_iter() { for user_agent in other.user_agents.into_iter() {
user_agents.insert(user_agent); user_agents.insert(user_agent);
@ -1214,7 +1176,6 @@ impl MultiSearchAggregator {
user_agents, user_agents,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
use_federation,
// do not add _ or ..Default::default() here // do not add _ or ..Default::default() here
}; };
@ -1233,7 +1194,6 @@ impl MultiSearchAggregator {
user_agents, user_agents,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
use_federation,
} = self; } = self;
if total_received == 0 { if total_received == 0 {
@ -1258,9 +1218,6 @@ impl MultiSearchAggregator {
"scoring": { "scoring": {
"show_ranking_score": show_ranking_score, "show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details, "show_ranking_score_details": show_ranking_score_details,
},
"federation": {
"use_federation": use_federation,
} }
}); });
@ -1509,75 +1466,6 @@ impl DocumentsAggregator {
} }
} }
#[derive(Default)]
pub struct EditDocumentsByFunctionAggregator {
timestamp: Option<OffsetDateTime>,
// Set to true if at least one request was filtered
filtered: bool,
// Set to true if at least one request contained a context
with_context: bool,
// context
user_agents: HashSet<String>,
index_creation: bool,
}
impl EditDocumentsByFunctionAggregator {
pub fn from_query(
documents_query: &DocumentEditionByFunction,
index_creation: bool,
request: &HttpRequest,
) -> Self {
let DocumentEditionByFunction { filter, context, function: _ } = documents_query;
Self {
timestamp: Some(OffsetDateTime::now_utc()),
user_agents: extract_user_agents(request).into_iter().collect(),
filtered: filter.is_some(),
with_context: context.is_some(),
index_creation,
}
}
/// Aggregate one [DocumentsAggregator] into another.
pub fn aggregate(&mut self, other: Self) {
let Self { timestamp, user_agents, index_creation, filtered, with_context } = other;
if self.timestamp.is_none() {
self.timestamp = timestamp;
}
// we can't create a union because there is no `into_union` method
for user_agent in user_agents {
self.user_agents.insert(user_agent);
}
self.index_creation |= index_creation;
self.filtered |= filtered;
self.with_context |= with_context;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
let Self { timestamp, user_agents, index_creation, filtered, with_context } = self;
let properties = json!({
"user-agent": user_agents,
"filtered": filtered,
"with_context": with_context,
"index_creation": index_creation,
});
Some(Track {
timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
#[derive(Default, Serialize)] #[derive(Default, Serialize)]
pub struct DocumentsDeletionAggregator { pub struct DocumentsDeletionAggregator {
#[serde(skip)] #[serde(skip)]

View File

@ -1,6 +1,6 @@
use actix_web as aweb; use actix_web as aweb;
use aweb::error::{JsonPayloadError, QueryPayloadError}; use aweb::error::{JsonPayloadError, QueryPayloadError};
use byte_unit::{Byte, UnitType}; use byte_unit::Byte;
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType}; use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
use meilisearch_types::error::{Code, ErrorCode, ResponseError}; use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError}; use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
@ -27,17 +27,13 @@ pub enum MeilisearchHttpError {
EmptyFilter, EmptyFilter,
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
InvalidExpression(&'static [&'static str], Value), InvalidExpression(&'static [&'static str], Value),
#[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")]
FederationOptionsInNonFederatedRequest(usize),
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")]
PaginationInFederatedQuery(usize, &'static str),
#[error("A {0} payload is missing.")] #[error("A {0} payload is missing.")]
MissingPayload(PayloadType), MissingPayload(PayloadType),
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")] #[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
TooManySearchRequests(usize), TooManySearchRequests(usize),
#[error("Internal error: Search limiter is down.")] #[error("Internal error: Search limiter is down.")]
SearchLimiterIsDown, SearchLimiterIsDown,
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))] #[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_bytes(*.0 as u64).get_appropriate_unit(true))]
PayloadTooLarge(usize), PayloadTooLarge(usize),
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.", #[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len() .0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
@ -90,12 +86,6 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::DocumentFormat(e) => e.error_code(), MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
MeilisearchHttpError::Join(_) => Code::Internal, MeilisearchHttpError::Join(_) => Code::Internal,
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid, MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
Code::InvalidMultiSearchFederationOptions
}
MeilisearchHttpError::PaginationInFederatedQuery(_, _) => {
Code::InvalidMultiSearchQueryPagination
}
} }
} }
} }

View File

@ -15,7 +15,6 @@ use std::fs::File;
use std::io::{BufReader, BufWriter}; use std::io::{BufReader, BufWriter};
use std::num::NonZeroUsize; use std::num::NonZeroUsize;
use std::path::Path; use std::path::Path;
use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use std::thread::{self, available_parallelism}; use std::thread::{self, available_parallelism};
use std::time::Duration; use std::time::Duration;
@ -24,13 +23,13 @@ use actix_cors::Cors;
use actix_http::body::MessageBody; use actix_http::body::MessageBody;
use actix_web::dev::{ServiceFactory, ServiceResponse}; use actix_web::dev::{ServiceFactory, ServiceResponse};
use actix_web::error::JsonPayloadError; use actix_web::error::JsonPayloadError;
use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest}; use actix_web::{web, HttpRequest};
use analytics::Analytics; use analytics::Analytics;
use anyhow::bail; use anyhow::bail;
use error::PayloadError; use error::PayloadError;
use extractors::payload::PayloadConfig; use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
@ -168,7 +167,7 @@ impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
let conn_info = request.connection_info(); let conn_info = request.connection_info();
let headers = request.headers(); let headers = request.headers();
let user_agent = headers let user_agent = headers
.get(USER_AGENT) .get(http::header::USER_AGENT)
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned()) .map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
.unwrap_or_default(); .unwrap_or_default();
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty) info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
@ -301,15 +300,15 @@ fn open_or_create_database_unchecked(
dumps_path: opt.dump_dir.clone(), dumps_path: opt.dump_dir.clone(),
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()), webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
webhook_authorization_header: opt.task_webhook_authorization_header.clone(), webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
task_db_size: opt.max_task_db_size.as_u64() as usize, task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.as_u64() as usize, index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?, indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true, autobatching_enabled: true,
cleanup_enabled: !opt.experimental_replication_parameters, cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000, max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks, max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
index_count: DEFAULT_INDEX_COUNT, index_count: DEFAULT_INDEX_COUNT,
instance_features, instance_features,
})?) })?)
@ -477,7 +476,7 @@ pub fn configure_data(
opt.experimental_search_queue_size, opt.experimental_search_queue_size,
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()), available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
); );
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize; let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
config config
.app_data(index_scheduler) .app_data(index_scheduler)
.app_data(auth) .app_data(auth)

View File

@ -151,7 +151,7 @@ async fn run_http(
.keep_alive(KeepAlive::Os); .keep_alive(KeepAlive::Os);
if let Some(config) = opt_clone.get_ssl_config()? { if let Some(config) = opt_clone.get_ssl_config()? {
http_server.bind_rustls_0_23(opt_clone.http_addr, config)?.run().await?; http_server.bind_rustls_021(opt_clone.http_addr, config)?.run().await?;
} else { } else {
http_server.bind(&opt_clone.http_addr)?.run().await?; http_server.bind(&opt_clone.http_addr)?.run().await?;
} }

View File

@ -9,14 +9,16 @@ use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use std::{env, fmt, fs}; use std::{env, fmt, fs};
use byte_unit::{Byte, ParseError, UnitType}; use byte_unit::{Byte, ByteError};
use clap::Parser; use clap::Parser;
use meilisearch_types::features::InstanceTogglableFeatures; use meilisearch_types::features::InstanceTogglableFeatures;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::ThreadPoolNoAbortBuilder; use meilisearch_types::milli::ThreadPoolNoAbortBuilder;
use rustls::server::{ServerSessionMemoryCache, WebPkiClientVerifier}; use rustls::server::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache,
};
use rustls::RootCertStore; use rustls::RootCertStore;
use rustls_pemfile::{certs, rsa_private_keys}; use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use sysinfo::{MemoryRefreshKind, RefreshKind, System}; use sysinfo::{MemoryRefreshKind, RefreshKind, System};
use url::Url; use url::Url;
@ -52,7 +54,6 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE"; const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE";
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS"; const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE"; const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE"; const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
@ -338,13 +339,6 @@ pub struct Opt {
#[serde(default)] #[serde(default)]
pub log_level: LogLevel, pub log_level: LogLevel,
/// Experimental contains filter feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/763>
///
/// Enables the experimental contains filter operator.
#[clap(long, env = MEILI_EXPERIMENTAL_CONTAINS_FILTER)]
#[serde(default)]
pub experimental_contains_filter: bool,
/// Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518> /// Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
/// ///
/// Enables the Prometheus metrics on the `GET /metrics` endpoint. /// Enables the Prometheus metrics on the `GET /metrics` endpoint.
@ -489,7 +483,6 @@ impl Opt {
config_file_path: _, config_file_path: _,
#[cfg(feature = "analytics")] #[cfg(feature = "analytics")]
no_analytics, no_analytics,
experimental_contains_filter,
experimental_enable_metrics, experimental_enable_metrics,
experimental_search_queue_size, experimental_search_queue_size,
experimental_logs_mode, experimental_logs_mode,
@ -547,10 +540,6 @@ impl Opt {
export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir); export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir);
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string()); export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_CONTAINS_FILTER,
experimental_contains_filter.to_string(),
);
export_to_env_if_not_present( export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_METRICS, MEILI_EXPERIMENTAL_ENABLE_METRICS,
experimental_enable_metrics.to_string(), experimental_enable_metrics.to_string(),
@ -580,21 +569,23 @@ impl Opt {
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> { pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) { if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
let config = rustls::ServerConfig::builder(); let config = rustls::ServerConfig::builder().with_safe_defaults();
let config = match &self.ssl_auth_path { let config = match &self.ssl_auth_path {
Some(auth_path) => { Some(auth_path) => {
let roots = load_certs(auth_path.to_path_buf())?; let roots = load_certs(auth_path.to_path_buf())?;
let mut client_auth_roots = RootCertStore::empty(); let mut client_auth_roots = RootCertStore::empty();
for root in roots { for root in roots {
client_auth_roots.add(root).unwrap(); client_auth_roots.add(&root).unwrap();
} }
let mut client_verifier = if self.ssl_require_auth {
WebPkiClientVerifier::builder(client_auth_roots.into()); let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots);
if !self.ssl_require_auth { config.with_client_cert_verifier(Arc::from(verifier))
client_verifier = client_verifier.allow_unauthenticated(); } else {
let verifier =
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots);
config.with_client_cert_verifier(Arc::from(verifier))
} }
config.with_client_cert_verifier(client_verifier.build()?)
} }
None => config.with_no_client_auth(), None => config.with_no_client_auth(),
}; };
@ -603,7 +594,7 @@ impl Opt {
let privkey = load_private_key(key_path.to_path_buf())?; let privkey = load_private_key(key_path.to_path_buf())?;
let ocsp = load_ocsp(&self.ssl_ocsp_path)?; let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
let mut config = config let mut config = config
.with_single_cert_with_ocsp(certs, privkey, ocsp) .with_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
.map_err(|_| anyhow::anyhow!("bad certificates/private key"))?; .map_err(|_| anyhow::anyhow!("bad certificates/private key"))?;
config.key_log = Arc::new(rustls::KeyLogFile::new()); config.key_log = Arc::new(rustls::KeyLogFile::new());
@ -613,7 +604,7 @@ impl Opt {
} }
if self.ssl_tickets { if self.ssl_tickets {
config.ticketer = rustls::crypto::ring::Ticketer::new().unwrap(); config.ticketer = rustls::Ticketer::new().unwrap();
} }
Ok(Some(config)) Ok(Some(config))
@ -626,7 +617,6 @@ impl Opt {
InstanceTogglableFeatures { InstanceTogglableFeatures {
metrics: self.experimental_enable_metrics, metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route, logs_route: self.experimental_enable_logs_route,
contains_filter: self.experimental_contains_filter,
} }
} }
} }
@ -684,7 +674,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
Ok(Self { Ok(Self {
log_every_n: Some(DEFAULT_LOG_EVERY_N), log_every_n: Some(DEFAULT_LOG_EVERY_N),
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize), max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
thread_pool: Some(thread_pool), thread_pool: Some(thread_pool),
max_positions_per_attributes: None, max_positions_per_attributes: None,
skip_index_budget: other.skip_index_budget, skip_index_budget: other.skip_index_budget,
@ -698,25 +688,23 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
pub struct MaxMemory(Option<Byte>); pub struct MaxMemory(Option<Byte>);
impl FromStr for MaxMemory { impl FromStr for MaxMemory {
type Err = ParseError; type Err = ByteError;
fn from_str(s: &str) -> Result<MaxMemory, Self::Err> { fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
Byte::from_str(s).map(Some).map(MaxMemory) Byte::from_str(s).map(Some).map(MaxMemory)
} }
} }
impl Default for MaxMemory { impl Default for MaxMemory {
fn default() -> MaxMemory { fn default() -> MaxMemory {
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64)) MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_bytes))
} }
} }
impl fmt::Display for MaxMemory { impl fmt::Display for MaxMemory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 { match self.0 {
Some(memory) => { Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
write!(f, "{}", memory.get_appropriate_unit(UnitType::Binary))
}
None => f.write_str("unknown"), None => f.write_str("unknown"),
} }
} }
@ -779,26 +767,21 @@ impl Deref for MaxThreads {
} }
} }
fn load_certs( fn load_certs(filename: PathBuf) -> anyhow::Result<Vec<rustls::Certificate>> {
filename: PathBuf,
) -> anyhow::Result<Vec<rustls::pki_types::CertificateDer<'static>>> {
let certfile = let certfile =
fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?; fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?;
let mut reader = BufReader::new(certfile); let mut reader = BufReader::new(certfile);
certs(&mut reader) certs(&mut reader)
.collect::<Result<Vec<_>, _>>() .map(|certs| certs.into_iter().map(rustls::Certificate).collect())
.map_err(|_| anyhow::anyhow!("cannot read certificate file")) .map_err(|_| anyhow::anyhow!("cannot read certificate file"))
} }
fn load_private_key( fn load_private_key(filename: PathBuf) -> anyhow::Result<rustls::PrivateKey> {
filename: PathBuf,
) -> anyhow::Result<rustls::pki_types::PrivateKeyDer<'static>> {
let rsa_keys = { let rsa_keys = {
let keyfile = fs::File::open(filename.clone()) let keyfile = fs::File::open(filename.clone())
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?; .map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
let mut reader = BufReader::new(keyfile); let mut reader = BufReader::new(keyfile);
rsa_private_keys(&mut reader) rsa_private_keys(&mut reader)
.collect::<Result<Vec<_>, _>>()
.map_err(|_| anyhow::anyhow!("file contains invalid rsa private key"))? .map_err(|_| anyhow::anyhow!("file contains invalid rsa private key"))?
}; };
@ -806,21 +789,19 @@ fn load_private_key(
let keyfile = fs::File::open(filename) let keyfile = fs::File::open(filename)
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?; .map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
let mut reader = BufReader::new(keyfile); let mut reader = BufReader::new(keyfile);
rustls_pemfile::pkcs8_private_keys(&mut reader).collect::<Result<Vec<_>, _>>().map_err( pkcs8_private_keys(&mut reader).map_err(|_| {
|_| { anyhow::anyhow!(
anyhow::anyhow!( "file contains invalid pkcs8 private key (encrypted keys not supported)"
"file contains invalid pkcs8 private key (encrypted keys not supported)" )
) })?
},
)?
}; };
// prefer to load pkcs8 keys // prefer to load pkcs8 keys
if !pkcs8_keys.is_empty() { if !pkcs8_keys.is_empty() {
Ok(rustls::pki_types::PrivateKeyDer::Pkcs8(pkcs8_keys[0].clone_key())) Ok(rustls::PrivateKey(pkcs8_keys[0].clone()))
} else { } else {
assert!(!rsa_keys.is_empty()); assert!(!rsa_keys.is_empty());
Ok(rustls::pki_types::PrivateKeyDer::Pkcs1(rsa_keys[0].clone_key())) Ok(rustls::PrivateKey(rsa_keys[0].clone()))
} }
} }
@ -863,11 +844,11 @@ fn default_env() -> String {
} }
fn default_max_index_size() -> Byte { fn default_max_index_size() -> Byte {
Byte::from_u64(INDEX_SIZE) Byte::from_bytes(INDEX_SIZE)
} }
fn default_max_task_db_size() -> Byte { fn default_max_task_db_size() -> Byte {
Byte::from_u64(TASK_DB_SIZE) Byte::from_bytes(TASK_DB_SIZE)
} }
fn default_http_payload_size_limit() -> Byte { fn default_http_payload_size_limit() -> Byte {

View File

@ -47,10 +47,6 @@ pub struct RuntimeTogglableFeatures {
pub metrics: Option<bool>, pub metrics: Option<bool>,
#[deserr(default)] #[deserr(default)]
pub logs_route: Option<bool>, pub logs_route: Option<bool>,
#[deserr(default)]
pub edit_documents_by_function: Option<bool>,
#[deserr(default)]
pub contains_filter: Option<bool>,
} }
async fn patch_features( async fn patch_features(
@ -70,23 +66,13 @@ async fn patch_features(
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store), vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics), metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route), logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
edit_documents_by_function: new_features
.0
.edit_documents_by_function
.unwrap_or(old_features.edit_documents_by_function),
contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter),
}; };
// explicitly destructure for analytics rather than using the `Serialize` implementation, because // explicitly destructure for analytics rather than using the `Serialize` implementation, because
// the it renames to camelCase, which we don't want for analytics. // the it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures { let meilisearch_types::features::RuntimeTogglableFeatures { vector_store, metrics, logs_route } =
vector_store, new_features;
metrics,
logs_route,
edit_documents_by_function,
contains_filter,
} = new_features;
analytics.publish( analytics.publish(
"Experimental features Updated".to_string(), "Experimental features Updated".to_string(),
@ -94,8 +80,6 @@ async fn patch_features(
"vector_store": vector_store, "vector_store": vector_store,
"metrics": metrics, "metrics": metrics,
"logs_route": logs_route, "logs_route": logs_route,
"edit_documents_by_function": edit_documents_by_function,
"contains_filter": contains_filter,
}), }),
Some(&req), Some(&req),
); );

View File

@ -7,7 +7,7 @@ use bstr::ByteSlice as _;
use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr; use deserr::Deserr;
use futures::StreamExt; use futures::StreamExt;
use index_scheduler::{IndexScheduler, RoFeatures, TaskId}; use index_scheduler::{IndexScheduler, TaskId};
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
@ -82,7 +82,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))), web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
) )
.service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter)))) .service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
.service(web::resource("/edit").route(web::post().to(SeqHandler(edit_documents_by_function))))
.service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post)))) .service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
.service( .service(
web::resource("/{document_id}") web::resource("/{document_id}")
@ -260,15 +259,8 @@ fn documents_by_query(
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?; let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?;
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents( let (total, documents) =
&index, retrieve_documents(&index, offset, limit, filter, fields, retrieve_vectors)?;
offset,
limit,
filter,
fields,
retrieve_vectors,
index_scheduler.features(),
)?;
let ret = PaginationView::new(offset, limit, total as usize, documents); let ret = PaginationView::new(offset, limit, total as usize, documents);
@ -312,11 +304,7 @@ pub async fn replace_documents(
debug!(parameters = ?params, "Replace documents"); debug!(parameters = ?params, "Replace documents");
let params = params.into_inner(); let params = params.into_inner();
analytics.add_documents( analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
&params,
index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
&req,
);
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let uid = get_task_id(&req, &opt)?; let uid = get_task_id(&req, &opt)?;
@ -353,11 +341,7 @@ pub async fn update_documents(
let params = params.into_inner(); let params = params.into_inner();
debug!(parameters = ?params, "Update documents"); debug!(parameters = ?params, "Update documents");
analytics.add_documents( analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
&params,
index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
&req,
);
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let uid = get_task_id(&req, &opt)?; let uid = get_task_id(&req, &opt)?;
@ -572,9 +556,11 @@ pub async fn delete_documents_by_filter(
analytics.delete_documents(DocumentDeletionKind::PerFilter, &req); analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
// we ensure the filter is well formed before enqueuing it // we ensure the filter is well formed before enqueuing it
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())? || -> Result<_, ResponseError> {
.ok_or(MeilisearchHttpError::EmptyFilter)?; Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
}()
// and whatever was the error, the error code should always be an InvalidDocumentFilter
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
let uid = get_task_id(&req, &opt)?; let uid = get_task_id(&req, &opt)?;
@ -588,83 +574,6 @@ pub async fn delete_documents_by_filter(
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct DocumentEditionByFunction {
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidDocumentEditionContext>)]
pub context: Option<Value>,
#[deserr(error = DeserrJsonError<InvalidDocumentEditionFunctionFilter>, missing_field_error = DeserrJsonError::missing_document_edition_function)]
pub function: String,
}
pub async fn edit_documents_by_function(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Edit documents by function");
index_scheduler
.features()
.check_edit_documents_by_function("Using the documents edit route")?;
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let params = params.into_inner();
analytics.update_documents_by_function(
&params,
index_scheduler.index(&index_uid).is_err(),
&req,
);
let DocumentEditionByFunction { filter, context, function } = params;
let engine = milli::rhai::Engine::new();
if let Err(e) = engine.compile(&function) {
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
}
if let Some(ref filter) = filter {
// we ensure the filter is well formed before enqueuing it
crate::search::parse_filter(
filter,
Code::InvalidDocumentFilter,
index_scheduler.features(),
)?
.ok_or(MeilisearchHttpError::EmptyFilter)?;
}
let task = KindWithContent::DocumentEdition {
index_uid,
filter_expr: filter,
context: match context {
Some(Value::Object(m)) => Some(m),
None => None,
_ => {
return Err(ResponseError::from_msg(
"The context must be an object".to_string(),
Code::InvalidDocumentEditionContext,
))
}
},
function,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Edit documents by function");
Ok(HttpResponse::Accepted().json(task))
}
pub async fn clear_all_documents( pub async fn clear_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>, index_uid: web::Path<String>,
@ -706,8 +615,6 @@ fn some_documents<'a, 't: 'a>(
document.remove("_vectors"); document.remove("_vectors");
} }
RetrieveVectors::Retrieve => { RetrieveVectors::Retrieve => {
// Clippy is simply wrong
#[allow(clippy::manual_unwrap_or_default)]
let mut vectors = match document.remove("_vectors") { let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map, Some(Value::Object(map)) => map,
_ => Default::default(), _ => Default::default(),
@ -742,12 +649,12 @@ fn retrieve_documents<S: AsRef<str>>(
filter: Option<Value>, filter: Option<Value>,
attributes_to_retrieve: Option<Vec<S>>, attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors, retrieve_vectors: RetrieveVectors,
features: RoFeatures,
) -> Result<(u64, Vec<Document>), ResponseError> { ) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let filter = &filter; let filter = &filter;
let filter = if let Some(filter) = filter { let filter = if let Some(filter) = filter {
parse_filter(filter, Code::InvalidDocumentFilter, features)? parse_filter(filter)
.map_err(|err| ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter))?
} else { } else {
None None
}; };

View File

@ -79,14 +79,7 @@ pub async fn search(
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?; let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
let _permit = search_queue.try_get_search_permit().await?; let _permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || { let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search( perform_facet_search(&index, search_query, facet_query, facet_name, search_kind)
&index,
search_query,
facet_query,
facet_name,
search_kind,
index_scheduler.features(),
)
}) })
.await?; .await?;

View File

@ -231,7 +231,7 @@ pub async fn search_with_url_query(
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?; let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
let _permit = search_queue.try_get_search_permit().await?; let _permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || { let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features()) perform_search(&index, query, search_kind, retrieve_vector)
}) })
.await?; .await?;
if let Ok(ref search_result) = search_result { if let Ok(ref search_result) = search_result {
@ -274,7 +274,7 @@ pub async fn search_with_post(
let _permit = search_queue.try_get_search_permit().await?; let _permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || { let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features()) perform_search(&index, query, search_kind, retrieve_vectors)
}) })
.await?; .await?;
if let Ok(ref search_result) = search_result { if let Ok(ref search_result) = search_result {

View File

@ -106,14 +106,7 @@ async fn similar(
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
tokio::task::spawn_blocking(move || { tokio::task::spawn_blocking(move || {
perform_similar( perform_similar(&index, query, embedder_name, embedder, retrieve_vectors)
&index,
query,
embedder_name,
embedder,
retrieve_vectors,
index_scheduler.features(),
)
}) })
.await? .await?
} }

View File

@ -10,14 +10,12 @@ use serde::Serialize;
use tracing::debug; use tracing::debug;
use crate::analytics::{Analytics, MultiSearchAggregator}; use crate::analytics::{Analytics, MultiSearchAggregator};
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::search_kind; use crate::routes::indexes::search::search_kind;
use crate::search::{ use crate::search::{
add_search_rules, perform_federated_search, perform_search, FederatedSearch, RetrieveVectors, add_search_rules, perform_search, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
SearchQueryWithIndex, SearchResultWithIndex,
}; };
use crate::search_queue::SearchQueue; use crate::search_queue::SearchQueue;
@ -30,44 +28,85 @@ struct SearchResults {
results: Vec<SearchResultWithIndex>, results: Vec<SearchResultWithIndex>,
} }
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQueries {
queries: Vec<SearchQueryWithIndex>,
}
pub async fn multi_search_with_post( pub async fn multi_search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: Data<SearchQueue>, search_queue: Data<SearchQueue>,
params: AwebJson<FederatedSearch, DeserrJsonError>, params: AwebJson<SearchQueries, DeserrJsonError>,
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let queries = params.into_inner().queries;
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
let features = index_scheduler.features();
// Since we don't want to process half of the search requests and then get a permit refused // Since we don't want to process half of the search requests and then get a permit refused
// we're going to get one permit for the whole duration of the multi-search request. // we're going to get one permit for the whole duration of the multi-search request.
let _permit = search_queue.try_get_search_permit().await?; let _permit = search_queue.try_get_search_permit().await?;
let federated_search = params.into_inner(); // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
// changes.
let search_results: Result<_, (ResponseError, usize)> = async {
let mut search_results = Vec::with_capacity(queries.len());
for (query_index, (index_uid, mut query)) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
{
debug!(on_index = query_index, parameters = ?query, "Multi-search");
let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req);
let FederatedSearch { mut queries, federation } = federated_search;
let features = index_scheduler.features();
// regardless of federation, check authorization and apply search rules
let auth = 'check_authorization: {
for (query_index, federated_query) in queries.iter_mut().enumerate() {
let index_uid = federated_query.index_uid.as_str();
// Check index from API key // Check index from API key
if !index_scheduler.filters().is_index_authorized(index_uid) { if !index_scheduler.filters().is_index_authorized(&index_uid) {
break 'check_authorization Err(AuthenticationError::InvalidToken) return Err(AuthenticationError::InvalidToken).with_index(query_index);
.with_index(query_index);
} }
// Apply search rules from tenant token // Apply search rules from tenant token
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(index_uid) if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
{ {
add_search_rules(&mut federated_query.filter, search_rules); add_search_rules(&mut query.filter, search_rules);
} }
}
Ok(())
};
auth.map_err(|(mut err, query_index)| { let index = index_scheduler
.index(&index_uid)
.map_err(|err| {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err
})
.with_index(query_index)?;
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)
.with_index(query_index)?;
let retrieve_vector =
RetrieveVectors::new(query.retrieve_vectors, features).with_index(query_index)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vector)
})
.await
.with_index(query_index)?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result.with_index(query_index)?,
});
}
Ok(search_results)
}
.await;
if search_results.is_ok() {
multi_aggregate.succeed();
}
analytics.post_multi_search(multi_aggregate);
let search_results = search_results.map_err(|(mut err, query_index)| {
// Add the query index that failed as context for the error message. // Add the query index that failed as context for the error message.
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type // We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
// of result and we can benefit from static typing. // of result and we can benefit from static typing.
@ -75,95 +114,9 @@ pub async fn multi_search_with_post(
err err
})?; })?;
let response = match federation { debug!(returns = ?search_results, "Multi-search");
Some(federation) => {
let search_result = tokio::task::spawn_blocking(move || {
perform_federated_search(&index_scheduler, queries, federation, features)
})
.await;
if let Ok(Ok(_)) = search_result { Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
multi_aggregate.succeed();
}
analytics.post_multi_search(multi_aggregate);
HttpResponse::Ok().json(search_result??)
}
None => {
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
// changes.
let search_results: Result<_, (ResponseError, usize)> = async {
let mut search_results = Vec::with_capacity(queries.len());
for (query_index, (index_uid, query, federation_options)) in queries
.into_iter()
.map(SearchQueryWithIndex::into_index_query_federation)
.enumerate()
{
debug!(on_index = query_index, parameters = ?query, "Multi-search");
if federation_options.is_some() {
return Err((
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(
query_index,
)
.into(),
query_index,
));
}
let index = index_scheduler
.index(&index_uid)
.map_err(|err| {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err
})
.with_index(query_index)?;
let search_kind =
search_kind(&query, index_scheduler.get_ref(), &index, features)
.with_index(query_index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)
.with_index(query_index)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vector, features)
})
.await
.with_index(query_index)?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result.with_index(query_index)?,
});
}
Ok(search_results)
}
.await;
if search_results.is_ok() {
multi_aggregate.succeed();
}
analytics.post_multi_search(multi_aggregate);
let search_results = search_results.map_err(|(mut err, query_index)| {
// Add the query index that failed as context for the error message.
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
// of result and we can benefit from static typing.
err.message = format!("Inside `.queries[{query_index}]`: {}", err.message);
err
})?;
debug!(returns = ?search_results, "Multi-search");
HttpResponse::Ok().json(SearchResults { results: search_results })
}
};
Ok(response)
} }
/// Local `Result` extension trait to avoid `map_err` boilerplate. /// Local `Result` extension trait to avoid `map_err` boilerplate.

View File

@ -591,7 +591,7 @@ mod tests {
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err(); let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###" snapshot!(meili_snap::json_string!(err), @r###"
{ {
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types", "code": "invalid_task_types",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types" "link": "https://docs.meilisearch.com/errors#invalid_task_types"

View File

@ -1,13 +1,12 @@
use core::fmt; use core::fmt;
use std::cmp::min; use std::cmp::min;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use deserr::Deserr; use deserr::Deserr;
use either::Either; use either::Either;
use index_scheduler::RoFeatures;
use indexmap::IndexMap; use indexmap::IndexMap;
use meilisearch_auth::IndexSearchRules; use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
@ -32,11 +31,6 @@ use serde_json::{json, Value};
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
mod federated;
pub use federated::{perform_federated_search, FederatedSearch, Federation, FederationOptions};
mod ranking_rules;
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>; type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0; pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
@ -263,13 +257,11 @@ pub struct HybridQuery {
pub embedder: Option<String>, pub embedder: Option<String>,
} }
#[derive(Clone)]
pub enum SearchKind { pub enum SearchKind {
KeywordOnly, KeywordOnly,
SemanticOnly { embedder_name: String, embedder: Arc<Embedder> }, SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 }, Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
} }
impl SearchKind { impl SearchKind {
pub(crate) fn semantic( pub(crate) fn semantic(
index_scheduler: &index_scheduler::IndexScheduler, index_scheduler: &index_scheduler::IndexScheduler,
@ -366,7 +358,7 @@ impl SearchQuery {
} }
} }
/// A `SearchQuery` + an index UID and optional FederationOptions. /// A `SearchQuery` + an index UID.
// This struct contains the fields of `SearchQuery` inline. // This struct contains the fields of `SearchQuery` inline.
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields. // This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date. // The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
@ -381,10 +373,10 @@ pub struct SearchQueryWithIndex {
pub vector: Option<Vec<f32>>, pub vector: Option<Vec<f32>>,
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)] #[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
pub hybrid: Option<HybridQuery>, pub hybrid: Option<HybridQuery>,
#[deserr(default, error = DeserrJsonError<InvalidSearchOffset>)] #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: Option<usize>, pub offset: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchLimit>)] #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: Option<usize>, pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)] #[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
pub page: Option<usize>, pub page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)] #[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
@ -425,33 +417,12 @@ pub struct SearchQueryWithIndex {
pub attributes_to_search_on: Option<Vec<String>>, pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>, pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default)]
pub federation_options: Option<FederationOptions>,
} }
impl SearchQueryWithIndex { impl SearchQueryWithIndex {
pub fn has_federation_options(&self) -> bool { pub fn into_index_query(self) -> (IndexUid, SearchQuery) {
self.federation_options.is_some()
}
pub fn has_pagination(&self) -> Option<&'static str> {
if self.offset.is_some() {
Some("offset")
} else if self.limit.is_some() {
Some("limit")
} else if self.page.is_some() {
Some("page")
} else if self.hits_per_page.is_some() {
Some("hitsPerPage")
} else {
None
}
}
pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
let SearchQueryWithIndex { let SearchQueryWithIndex {
index_uid, index_uid,
federation_options,
q, q,
vector, vector,
offset, offset,
@ -483,8 +454,8 @@ impl SearchQueryWithIndex {
SearchQuery { SearchQuery {
q, q,
vector, vector,
offset: offset.unwrap_or(DEFAULT_SEARCH_OFFSET()), offset,
limit: limit.unwrap_or(DEFAULT_SEARCH_LIMIT()), limit,
page, page,
hits_per_page, hits_per_page,
attributes_to_retrieve, attributes_to_retrieve,
@ -509,7 +480,6 @@ impl SearchQueryWithIndex {
// do not use ..Default::default() here, // do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
}, },
federation_options,
) )
} }
} }
@ -762,8 +732,7 @@ fn prepare_search<'t>(
query: &'t SearchQuery, query: &'t SearchQuery,
search_kind: &SearchKind, search_kind: &SearchKind,
time_budget: TimeBudget, time_budget: TimeBudget,
features: RoFeatures, ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
) -> Result<(milli::Search<'t>, bool, usize, usize), ResponseError> {
let mut search = index.search(rtxn); let mut search = index.search(rtxn);
search.time_budget(time_budget); search.time_budget(time_budget);
if let Some(ranking_score_threshold) = query.ranking_score_threshold { if let Some(ranking_score_threshold) = query.ranking_score_threshold {
@ -850,7 +819,7 @@ fn prepare_search<'t>(
search.limit(limit); search.limit(limit);
if let Some(ref filter) = query.filter { if let Some(ref filter) = query.filter {
if let Some(facets) = parse_filter(filter, Code::InvalidSearchFilter, features)? { if let Some(facets) = parse_filter(filter)? {
search.filter(facets); search.filter(facets);
} }
} }
@ -874,8 +843,7 @@ pub fn perform_search(
query: SearchQuery, query: SearchQuery,
search_kind: SearchKind, search_kind: SearchKind,
retrieve_vectors: RetrieveVectors, retrieve_vectors: RetrieveVectors,
features: RoFeatures, ) -> Result<SearchResult, MeilisearchHttpError> {
) -> Result<SearchResult, ResponseError> {
let before_search = Instant::now(); let before_search = Instant::now();
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let time_budget = match index.search_cutoff(&rtxn)? { let time_budget = match index.search_cutoff(&rtxn)? {
@ -884,7 +852,7 @@ pub fn perform_search(
}; };
let (search, is_finite_pagination, max_total_hits, offset) = let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query, &search_kind, time_budget, features)?; prepare_search(index, &rtxn, &query, &search_kind, time_budget)?;
let ( let (
milli::SearchResult { milli::SearchResult {
@ -896,7 +864,15 @@ pub fn perform_search(
used_negative_operator, used_negative_operator,
}, },
semantic_hit_count, semantic_hit_count,
) = search_from_kind(search_kind, search)?; ) = match &search_kind {
SearchKind::KeywordOnly => (search.execute()?, None),
SearchKind::SemanticOnly { .. } => {
let results = search.execute()?;
let semantic_hit_count = results.document_scores.len() as u32;
(results, Some(semantic_hit_count))
}
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
};
let SearchQuery { let SearchQuery {
q, q,
@ -943,13 +919,8 @@ pub fn perform_search(
show_ranking_score_details, show_ranking_score_details,
}; };
let documents = make_hits( let documents =
index, make_hits(index, &rtxn, format, matching_words, documents_ids, document_scores)?;
&rtxn,
format,
matching_words,
documents_ids.iter().copied().zip(document_scores.iter()),
)?;
let number_of_hits = min(candidates.len() as usize, max_total_hits); let number_of_hits = min(candidates.len() as usize, max_total_hits);
let hits_info = if is_finite_pagination { let hits_info = if is_finite_pagination {
@ -1017,22 +988,6 @@ pub fn perform_search(
Ok(result) Ok(result)
} }
pub fn search_from_kind(
search_kind: SearchKind,
search: milli::Search<'_>,
) -> Result<(milli::SearchResult, Option<u32>), MeilisearchHttpError> {
let (milli_result, semantic_hit_count) = match &search_kind {
SearchKind::KeywordOnly => (search.execute()?, None),
SearchKind::SemanticOnly { .. } => {
let results = search.execute()?;
let semantic_hit_count = results.document_scores.len() as u32;
(results, Some(semantic_hit_count))
}
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
};
Ok((milli_result, semantic_hit_count))
}
struct AttributesFormat { struct AttributesFormat {
attributes_to_retrieve: Option<BTreeSet<String>>, attributes_to_retrieve: Option<BTreeSet<String>>,
retrieve_vectors: RetrieveVectors, retrieve_vectors: RetrieveVectors,
@ -1078,191 +1033,129 @@ impl RetrieveVectors {
} }
} }
struct HitMaker<'a> { fn make_hits(
index: &'a Index, index: &Index,
rtxn: &'a RoTxn<'a>, rtxn: &RoTxn<'_>,
fields_ids_map: FieldsIdsMap, format: AttributesFormat,
displayed_ids: BTreeSet<FieldId>, matching_words: milli::MatchingWords,
vectors_fid: Option<FieldId>, documents_ids: Vec<u32>,
retrieve_vectors: RetrieveVectors, document_scores: Vec<Vec<ScoreDetails>>,
to_retrieve_ids: BTreeSet<FieldId>, ) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
embedding_configs: Vec<milli::index::IndexEmbeddingConfig>, let fields_ids_map = index.fields_ids_map(rtxn).unwrap();
formatter_builder: MatcherBuilder<'a>, let displayed_ids =
formatted_options: BTreeMap<FieldId, FormatOptions>, index.displayed_fields_ids(rtxn)?.map(|fields| fields.into_iter().collect::<BTreeSet<_>>());
show_ranking_score: bool,
show_ranking_score_details: bool,
sort: Option<Vec<String>>,
show_matches_position: bool,
}
impl<'a> HitMaker<'a> { let vectors_fid = fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
pub fn tokenizer<'b>(
script_lang_map: &'b HashMap<milli::tokenizer::Script, Vec<milli::tokenizer::Language>>,
dictionary: Option<&'b [&'b str]>,
separators: Option<&'b [&'b str]>,
) -> milli::tokenizer::Tokenizer<'b> {
let mut tokenizer_builder = TokenizerBuilder::default();
tokenizer_builder.create_char_map(true);
if !script_lang_map.is_empty() {
tokenizer_builder.allow_list(script_lang_map);
}
if let Some(separators) = separators { let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
tokenizer_builder.separators(separators); // displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
} (None, _) => false,
// displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
(Some(_), None) => true,
// displayed_ids is a finit list, so hide if `_vectors` is not part of it
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
};
if let Some(dictionary) = dictionary { let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors {
tokenizer_builder.words_dict(dictionary); if vectors_is_hidden {
} RetrieveVectors::Hide
tokenizer_builder.into_tokenizer()
}
pub fn formatter_builder(
matching_words: milli::MatchingWords,
tokenizer: milli::tokenizer::Tokenizer<'_>,
) -> MatcherBuilder<'_> {
let formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
formatter_builder
}
pub fn new(
index: &'a Index,
rtxn: &'a RoTxn<'a>,
format: AttributesFormat,
mut formatter_builder: MatcherBuilder<'a>,
) -> Result<Self, MeilisearchHttpError> {
formatter_builder.crop_marker(format.crop_marker);
formatter_builder.highlight_prefix(format.highlight_pre_tag);
formatter_builder.highlight_suffix(format.highlight_post_tag);
let fields_ids_map = index.fields_ids_map(rtxn)?;
let displayed_ids = index
.displayed_fields_ids(rtxn)?
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>());
let vectors_fid =
fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
// displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
(None, _) => false,
// displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
(Some(_), None) => true,
// displayed_ids is a finit list, so hide if `_vectors` is not part of it
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
};
let displayed_ids =
displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors {
if vectors_is_hidden {
RetrieveVectors::Hide
} else {
RetrieveVectors::Retrieve
}
} else { } else {
format.retrieve_vectors RetrieveVectors::Retrieve
}; }
} else {
format.retrieve_vectors
};
let fids = |attrs: &BTreeSet<String>| { let displayed_ids =
let mut ids = BTreeSet::new(); displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
for attr in attrs { let fids = |attrs: &BTreeSet<String>| {
if attr == "*" { let mut ids = BTreeSet::new();
ids.clone_from(&displayed_ids); for attr in attrs {
break; if attr == "*" {
} ids.clone_from(&displayed_ids);
break;
if let Some(id) = fields_ids_map.id(attr) {
ids.insert(id);
}
} }
ids
};
let to_retrieve_ids: BTreeSet<_> = format
.attributes_to_retrieve
.as_ref()
.map(fids)
.unwrap_or_else(|| displayed_ids.clone())
.intersection(&displayed_ids)
.cloned()
.collect();
let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default(); if let Some(id) = fields_ids_map.id(attr) {
let attr_to_crop = format.attributes_to_crop.unwrap_or_default(); ids.insert(id);
let formatted_options = compute_formatted_options( }
&attr_to_highlight, }
&attr_to_crop, ids
format.crop_length, };
&to_retrieve_ids, let to_retrieve_ids: BTreeSet<_> = format
&fields_ids_map, .attributes_to_retrieve
&displayed_ids, .as_ref()
); .map(fids)
.unwrap_or_else(|| displayed_ids.clone())
.intersection(&displayed_ids)
.cloned()
.collect();
let embedding_configs = index.embedding_configs(rtxn)?; let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default();
let attr_to_crop = format.attributes_to_crop.unwrap_or_default();
Ok(Self { let formatted_options = compute_formatted_options(
index, &attr_to_highlight,
rtxn, &attr_to_crop,
fields_ids_map, format.crop_length,
displayed_ids, &to_retrieve_ids,
vectors_fid, &fields_ids_map,
retrieve_vectors, &displayed_ids,
to_retrieve_ids, );
embedding_configs, let mut tokenizer_builder = TokenizerBuilder::default();
formatter_builder, tokenizer_builder.create_char_map(true);
formatted_options, let script_lang_map = index.script_language(rtxn)?;
show_ranking_score: format.show_ranking_score, if !script_lang_map.is_empty() {
show_ranking_score_details: format.show_ranking_score_details, tokenizer_builder.allow_list(&script_lang_map);
show_matches_position: format.show_matches_position,
sort: format.sort,
})
} }
let separators = index.allowed_separators(rtxn)?;
pub fn make_hit( let separators: Option<Vec<_>> =
&self, separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
id: u32, if let Some(ref separators) = separators {
score: &[ScoreDetails], tokenizer_builder.separators(separators);
) -> Result<SearchHit, MeilisearchHttpError> { }
let (_, obkv) = let dictionary = index.dictionary(rtxn)?;
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?; let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
if let Some(ref dictionary) = dictionary {
tokenizer_builder.words_dict(dictionary);
}
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
formatter_builder.crop_marker(format.crop_marker);
formatter_builder.highlight_prefix(format.highlight_pre_tag);
formatter_builder.highlight_suffix(format.highlight_post_tag);
let mut documents = Vec::new();
let embedding_configs = index.embedding_configs(rtxn)?;
let documents_iter = index.documents(rtxn, documents_ids)?;
for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
// First generate a document with all the displayed fields // First generate a document with all the displayed fields
let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?; let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
let add_vectors_fid = let add_vectors_fid =
self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve); vectors_fid.filter(|_fid| retrieve_vectors == RetrieveVectors::Retrieve);
// select the attributes to retrieve // select the attributes to retrieve
let attributes_to_retrieve = self let attributes_to_retrieve = to_retrieve_ids
.to_retrieve_ids
.iter() .iter()
// skip the vectors_fid if RetrieveVectors::Hide // skip the vectors_fid if RetrieveVectors::Hide
.filter(|fid| match self.vectors_fid { .filter(|fid| match vectors_fid {
Some(vectors_fid) => { Some(vectors_fid) => {
!(self.retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid) !(retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid)
} }
None => true, None => true,
}) })
// need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve` // need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve`
.chain(add_vectors_fid.iter()) .chain(add_vectors_fid.iter())
.map(|&fid| self.fields_ids_map.name(fid).expect("Missing field name")); .map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
let mut document = let mut document =
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
if self.retrieve_vectors == RetrieveVectors::Retrieve { if retrieve_vectors == RetrieveVectors::Retrieve {
// Clippy is wrong
#[allow(clippy::manual_unwrap_or_default)]
let mut vectors = match document.remove("_vectors") { let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map, Some(Value::Object(map)) => map,
_ => Default::default(), _ => Default::default(),
}; };
for (name, vector) in self.index.embeddings(self.rtxn, id)? { for (name, vector) in index.embeddings(rtxn, id)? {
let user_provided = self let user_provided = embedding_configs
.embedding_configs
.iter() .iter()
.find(|conf| conf.name == name) .find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_provided.contains(id)); .is_some_and(|conf| conf.user_provided.contains(id));
@ -1275,21 +1168,21 @@ impl<'a> HitMaker<'a> {
let (matches_position, formatted) = format_fields( let (matches_position, formatted) = format_fields(
&displayed_document, &displayed_document,
&self.fields_ids_map, &fields_ids_map,
&self.formatter_builder, &formatter_builder,
&self.formatted_options, &formatted_options,
self.show_matches_position, format.show_matches_position,
&self.displayed_ids, &displayed_ids,
)?; )?;
if let Some(sort) = self.sort.as_ref() { if let Some(sort) = format.sort.as_ref() {
insert_geo_distance(sort, &mut document); insert_geo_distance(sort, &mut document);
} }
let ranking_score = let ranking_score =
self.show_ranking_score.then(|| ScoreDetails::global_score(score.iter())); format.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
let ranking_score_details = let ranking_score_details =
self.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter())); format.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
let hit = SearchHit { let hit = SearchHit {
document, document,
@ -1298,38 +1191,7 @@ impl<'a> HitMaker<'a> {
ranking_score_details, ranking_score_details,
ranking_score, ranking_score,
}; };
documents.push(hit);
Ok(hit)
}
}
fn make_hits<'a>(
index: &Index,
rtxn: &RoTxn<'_>,
format: AttributesFormat,
matching_words: milli::MatchingWords,
documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
let mut documents = Vec::new();
let script_lang_map = index.script_language(rtxn)?;
let dictionary = index.dictionary(rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
let separators = index.allowed_separators(rtxn)?;
let separators: Option<Vec<_>> =
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
let tokenizer =
HitMaker::tokenizer(&script_lang_map, dictionary.as_deref(), separators.as_deref());
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
let hit_maker = HitMaker::new(index, rtxn, format, formatter_builder)?;
for (id, score) in documents_ids_scores {
documents.push(hit_maker.make_hit(id, score)?);
} }
Ok(documents) Ok(documents)
} }
@ -1340,8 +1202,7 @@ pub fn perform_facet_search(
facet_query: Option<String>, facet_query: Option<String>,
facet_name: String, facet_name: String,
search_kind: SearchKind, search_kind: SearchKind,
features: RoFeatures, ) -> Result<FacetSearchResult, MeilisearchHttpError> {
) -> Result<FacetSearchResult, ResponseError> {
let before_search = Instant::now(); let before_search = Instant::now();
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let time_budget = match index.search_cutoff(&rtxn)? { let time_budget = match index.search_cutoff(&rtxn)? {
@ -1349,8 +1210,7 @@ pub fn perform_facet_search(
None => TimeBudget::default(), None => TimeBudget::default(),
}; };
let (search, _, _, _) = let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, &search_kind, time_budget)?;
prepare_search(index, &rtxn, &search_query, &search_kind, time_budget, features)?;
let mut facet_search = SearchForFacetValues::new( let mut facet_search = SearchForFacetValues::new(
facet_name, facet_name,
search, search,
@ -1376,7 +1236,6 @@ pub fn perform_similar(
embedder_name: String, embedder_name: String,
embedder: Arc<Embedder>, embedder: Arc<Embedder>,
retrieve_vectors: RetrieveVectors, retrieve_vectors: RetrieveVectors,
features: RoFeatures,
) -> Result<SimilarResult, ResponseError> { ) -> Result<SimilarResult, ResponseError> {
let before_search = Instant::now(); let before_search = Instant::now();
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
@ -1407,7 +1266,10 @@ pub fn perform_similar(
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder); milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
if let Some(ref filter) = query.filter { if let Some(ref filter) = query.filter {
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? { if let Some(facets) = parse_filter(filter)
// inject InvalidSimilarFilter code
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::InvalidSimilarFilter))?
{
similar.filter(facets); similar.filter(facets);
} }
} }
@ -1445,13 +1307,7 @@ pub fn perform_similar(
show_ranking_score_details, show_ranking_score_details,
}; };
let hits = make_hits( let hits = make_hits(index, &rtxn, format, Default::default(), documents_ids, document_scores)?;
index,
&rtxn,
format,
Default::default(),
documents_ids.iter().copied().zip(document_scores.iter()),
)?;
let max_total_hits = index let max_total_hits = index
.pagination_max_total_hits(&rtxn) .pagination_max_total_hits(&rtxn)
@ -1624,10 +1480,10 @@ fn make_document(
Ok(document) Ok(document)
} }
fn format_fields( fn format_fields<'a>(
document: &Document, document: &Document,
field_ids_map: &FieldsIdsMap, field_ids_map: &FieldsIdsMap,
builder: &MatcherBuilder<'_>, builder: &'a MatcherBuilder<'a>,
formatted_options: &BTreeMap<FieldId, FormatOptions>, formatted_options: &BTreeMap<FieldId, FormatOptions>,
compute_matches: bool, compute_matches: bool,
displayable_ids: &BTreeSet<FieldId>, displayable_ids: &BTreeSet<FieldId>,
@ -1682,9 +1538,9 @@ fn format_fields(
Ok((matches_position, document)) Ok((matches_position, document))
} }
fn format_value( fn format_value<'a>(
value: Value, value: Value,
builder: &MatcherBuilder<'_>, builder: &'a MatcherBuilder<'a>,
format_options: Option<FormatOptions>, format_options: Option<FormatOptions>,
infos: &mut Vec<MatchBounds>, infos: &mut Vec<MatchBounds>,
compute_matches: bool, compute_matches: bool,
@ -1763,33 +1619,15 @@ fn format_value(
} }
} }
pub(crate) fn parse_filter( pub(crate) fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
facets: &Value, match facets {
filter_parsing_error_code: Code, Value::String(expr) => {
features: RoFeatures, let condition = Filter::from_str(expr)?;
) -> Result<Option<Filter>, ResponseError> { Ok(condition)
let filter = match facets {
Value::String(expr) => Filter::from_str(expr).map_err(|e| e.into()),
Value::Array(arr) => parse_filter_array(arr).map_err(|e| e.into()),
v => Err(MeilisearchHttpError::InvalidExpression(&["String", "Array"], v.clone()).into()),
};
let filter = filter.map_err(|err: ResponseError| {
ResponseError::from_msg(err.to_string(), filter_parsing_error_code)
})?;
if let Some(ref filter) = filter {
// If the contains operator is used while the contains filter features is not enabled, errors out
if let Some((token, error)) =
filter.use_contains_operator().zip(features.check_contains_filter().err())
{
return Err(ResponseError::from_msg(
token.as_external_error(error).to_string(),
Code::FeatureNotEnabled,
));
} }
Value::Array(arr) => parse_filter_array(arr),
v => Err(MeilisearchHttpError::InvalidExpression(&["String", "Array"], v.clone())),
} }
Ok(filter)
} }
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> { fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> {

View File

@ -1,629 +0,0 @@
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::fmt;
use std::iter::Zip;
use std::rc::Rc;
use std::str::FromStr as _;
use std::time::Duration;
use std::vec::{IntoIter, Vec};
use actix_http::StatusCode;
use index_scheduler::{IndexScheduler, RoFeatures};
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::{
InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset,
};
use meilisearch_types::error::ResponseError;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
use meilisearch_types::milli::{self, DocumentId, TimeBudget};
use roaring::RoaringBitmap;
use serde::Serialize;
use super::ranking_rules::{self, RankingRules};
use super::{
prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind,
SearchQuery, SearchQueryWithIndex,
};
use crate::error::MeilisearchHttpError;
use crate::routes::indexes::search::search_kind;
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct FederationOptions {
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
pub weight: Weight,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)]
pub struct Weight(f64);
impl Default for Weight {
fn default() -> Self {
Weight(DEFAULT_FEDERATED_WEIGHT)
}
}
impl std::convert::TryFrom<f64> for Weight {
type Error = InvalidMultiSearchWeight;
fn try_from(f: f64) -> Result<Self, Self::Error> {
if f < 0.0 {
Err(InvalidMultiSearchWeight)
} else {
Ok(Weight(f))
}
}
}
impl std::ops::Deref for Weight {
type Target = f64;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct Federation {
#[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
}
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct FederatedSearch {
pub queries: Vec<SearchQueryWithIndex>,
#[deserr(default)]
pub federation: Option<Federation>,
}
#[derive(Serialize, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct FederatedSearchResult {
pub hits: Vec<SearchHit>,
pub processing_time_ms: u128,
#[serde(flatten)]
pub hits_info: HitsInfo,
#[serde(skip_serializing_if = "Option::is_none")]
pub semantic_hit_count: Option<u32>,
// These fields are only used for analytics purposes
#[serde(skip)]
pub degraded: bool,
#[serde(skip)]
pub used_negative_operator: bool,
}
impl fmt::Debug for FederatedSearchResult {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let FederatedSearchResult {
hits,
processing_time_ms,
hits_info,
semantic_hit_count,
degraded,
used_negative_operator,
} = self;
let mut debug = f.debug_struct("SearchResult");
// The most important thing when looking at a search result is the time it took to process
debug.field("processing_time_ms", &processing_time_ms);
debug.field("hits", &format!("[{} hits returned]", hits.len()));
debug.field("hits_info", &hits_info);
if *used_negative_operator {
debug.field("used_negative_operator", used_negative_operator);
}
if *degraded {
debug.field("degraded", degraded);
}
if let Some(semantic_hit_count) = semantic_hit_count {
debug.field("semantic_hit_count", &semantic_hit_count);
}
debug.finish()
}
}
struct WeightedScore<'a> {
details: &'a [ScoreDetails],
weight: f64,
}
impl<'a> WeightedScore<'a> {
pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self {
Self { details, weight }
}
pub fn weighted_global_score(&self) -> f64 {
ScoreDetails::global_score(self.details.iter()) * self.weight
}
pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering {
self.weighted_global_score()
.partial_cmp(&other.weighted_global_score())
// both are numbers, possibly infinite
.unwrap()
}
pub fn compare(&self, other: &Self) -> Ordering {
let mut left_it = ScoreDetails::score_values(self.details.iter());
let mut right_it = ScoreDetails::score_values(other.details.iter());
loop {
let left = left_it.next();
let right = right_it.next();
match (left, right) {
(None, None) => return Ordering::Equal,
(None, Some(_)) => return Ordering::Less,
(Some(_), None) => return Ordering::Greater,
(Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => {
let left = left * self.weight;
let right = right * other.weight;
if (left - right).abs() <= f64::EPSILON {
continue;
}
return left.partial_cmp(&right).unwrap();
}
(Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => {
match left.partial_cmp(right) {
Some(Ordering::Equal) => continue,
Some(order) => return order,
None => return self.compare_weighted_global_scores(other),
}
}
(Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => {
match left.partial_cmp(right) {
Some(Ordering::Equal) => continue,
Some(order) => return order,
None => {
return self.compare_weighted_global_scores(other);
}
}
}
// not comparable details, use global
(Some(ScoreValue::Score(_)), Some(_))
| (Some(_), Some(ScoreValue::Score(_)))
| (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
| (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
let left_count = left_it.count();
let right_count = right_it.count();
// compare how many remaining groups of rules each side has.
// the group with the most remaining groups wins.
return left_count
.cmp(&right_count)
// breaks ties with the global ranking score
.then_with(|| self.compare_weighted_global_scores(other));
}
}
}
}
}
struct QueryByIndex {
query: SearchQuery,
federation_options: FederationOptions,
query_index: usize,
}
struct SearchResultByQuery<'a> {
documents_ids: Vec<DocumentId>,
document_scores: Vec<Vec<ScoreDetails>>,
federation_options: FederationOptions,
hit_maker: HitMaker<'a>,
query_index: usize,
}
struct SearchResultByQueryIter<'a> {
it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>,
federation_options: FederationOptions,
hit_maker: Rc<HitMaker<'a>>,
query_index: usize,
}
impl<'a> SearchResultByQueryIter<'a> {
fn new(
SearchResultByQuery {
documents_ids,
document_scores,
federation_options,
hit_maker,
query_index,
}: SearchResultByQuery<'a>,
) -> Self {
let it = documents_ids.into_iter().zip(document_scores);
Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index }
}
}
struct SearchResultByQueryIterItem<'a> {
docid: DocumentId,
score: Vec<ScoreDetails>,
federation_options: FederationOptions,
hit_maker: Rc<HitMaker<'a>>,
query_index: usize,
}
fn merge_index_local_results(
results_by_query: Vec<SearchResultByQuery<'_>>,
) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
itertools::kmerge_by(
results_by_query.into_iter().map(SearchResultByQueryIter::new),
|left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
match left_score.compare(&right_score) {
// the biggest score goes first
Ordering::Greater => true,
// break ties using query index
Ordering::Equal => left.query_index < right.query_index,
Ordering::Less => false,
}
},
)
}
fn merge_index_global_results(
results_by_index: Vec<SearchResultByIndex>,
) -> impl Iterator<Item = SearchHitByIndex> {
itertools::kmerge_by(
results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()),
|left: &SearchHitByIndex, right: &SearchHitByIndex| {
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
match left_score.compare(&right_score) {
// the biggest score goes first
Ordering::Greater => true,
// break ties using query index
Ordering::Equal => left.query_index < right.query_index,
Ordering::Less => false,
}
},
)
}
impl<'a> Iterator for SearchResultByQueryIter<'a> {
type Item = SearchResultByQueryIterItem<'a>;
fn next(&mut self) -> Option<Self::Item> {
let (docid, score) = self.it.next()?;
Some(SearchResultByQueryIterItem {
docid,
score,
federation_options: self.federation_options,
hit_maker: Rc::clone(&self.hit_maker),
query_index: self.query_index,
})
}
}
struct SearchHitByIndex {
hit: SearchHit,
score: Vec<ScoreDetails>,
federation_options: FederationOptions,
query_index: usize,
}
struct SearchResultByIndex {
hits: Vec<SearchHitByIndex>,
candidates: RoaringBitmap,
degraded: bool,
used_negative_operator: bool,
}
pub fn perform_federated_search(
index_scheduler: &IndexScheduler,
queries: Vec<SearchQueryWithIndex>,
federation: Federation,
features: RoFeatures,
) -> Result<FederatedSearchResult, ResponseError> {
let before_search = std::time::Instant::now();
// this implementation partition the queries by index to guarantee an important property:
// - all the queries to a particular index use the same read transaction.
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
// 1. partition queries by index
let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default();
for (query_index, federated_query) in queries.into_iter().enumerate() {
if let Some(pagination_field) = federated_query.has_pagination() {
return Err(MeilisearchHttpError::PaginationInFederatedQuery(
query_index,
pagination_field,
)
.into());
}
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
query,
federation_options: federation_options.unwrap_or_default(),
query_index,
})
}
// 2. perform queries, merge and make hits index by index
let required_hit_count = federation.limit + federation.offset;
// In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
// Then in step (3), we'll update its value if there is any semantic search
let mut semantic_hit_count = None;
let mut results_by_index = Vec::with_capacity(queries_by_index.len());
let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
for (index_uid, queries) in queries_by_index {
let index = match index_scheduler.index(&index_uid) {
Ok(index) => index,
Err(err) => {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
if let Some(query) = queries.first() {
err.message =
format!("Inside `.queries[{}]`: {}", query.query_index, err.message);
}
return Err(err);
}
};
// Important: this is the only transaction we'll use for this index during this federated search
let rtxn = index.read_txn()?;
let criteria = index.criteria(&rtxn)?;
// stuff we need for the hitmaker
let script_lang_map = index.script_language(&rtxn)?;
let dictionary = index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
let separators = index.allowed_separators(&rtxn)?;
let separators: Option<Vec<_>> =
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
// each query gets its individual cutoff
let cutoff = index.search_cutoff(&rtxn)?;
let mut degraded = false;
let mut used_negative_operator = false;
let mut candidates = RoaringBitmap::new();
// 2.1. Compute all candidates for each query in the index
let mut results_by_query = Vec::with_capacity(queries.len());
for QueryByIndex { query, federation_options, query_index } in queries {
// use an immediately invoked lambda to capture the result without returning from the function
let res: Result<(), ResponseError> = (|| {
let search_kind = search_kind(&query, index_scheduler, &index, features)?;
let canonicalization_kind = match (&search_kind, &query.q) {
(SearchKind::SemanticOnly { .. }, _) => {
ranking_rules::CanonicalizationKind::Vector
}
(_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword,
_ => ranking_rules::CanonicalizationKind::Placeholder,
};
let sort = if let Some(sort) = &query.sort {
let sorts: Vec<_> =
match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(milli::SortError::from(
asc_desc_error,
))
.into())
}
};
Some(sorts)
} else {
None
};
let ranking_rules = ranking_rules::RankingRules::new(
criteria.clone(),
sort,
query.matching_strategy.into(),
canonicalization_kind,
);
if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) =
previous_query_data.take()
{
if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) {
return Err(error.to_response_error(
&ranking_rules,
&previous_ranking_rules,
query_index,
previous_query_index,
&index_uid,
&previous_index_uid,
));
}
previous_query_data = if previous_ranking_rules.constraint_count()
> ranking_rules.constraint_count()
{
Some((previous_ranking_rules, previous_query_index, previous_index_uid))
} else {
Some((ranking_rules, query_index, index_uid.clone()))
};
} else {
previous_query_data = Some((ranking_rules, query_index, index_uid.clone()));
}
match search_kind {
SearchKind::KeywordOnly => {}
_ => semantic_hit_count = Some(0),
}
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
let time_budget = match cutoff {
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
None => TimeBudget::default(),
};
let (mut search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index, &rtxn, &query, &search_kind, time_budget, features)?;
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
search.offset(0);
search.limit(required_hit_count);
let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?;
let format = AttributesFormat {
attributes_to_retrieve: query.attributes_to_retrieve,
retrieve_vectors,
attributes_to_highlight: query.attributes_to_highlight,
attributes_to_crop: query.attributes_to_crop,
crop_length: query.crop_length,
crop_marker: query.crop_marker,
highlight_pre_tag: query.highlight_pre_tag,
highlight_post_tag: query.highlight_post_tag,
show_matches_position: query.show_matches_position,
sort: query.sort,
show_ranking_score: query.show_ranking_score,
show_ranking_score_details: query.show_ranking_score_details,
};
let milli::SearchResult {
matching_words,
candidates: query_candidates,
documents_ids,
document_scores,
degraded: query_degraded,
used_negative_operator: query_used_negative_operator,
} = result;
candidates |= query_candidates;
degraded |= query_degraded;
used_negative_operator |= query_used_negative_operator;
let tokenizer = HitMaker::tokenizer(
&script_lang_map,
dictionary.as_deref(),
separators.as_deref(),
);
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?;
results_by_query.push(SearchResultByQuery {
federation_options,
hit_maker,
query_index,
documents_ids,
document_scores,
});
Ok(())
})();
if let Err(mut error) = res {
error.message = format!("Inside `.queries[{query_index}]`: {}", error.message);
return Err(error);
}
}
// 2.2. merge inside index
let mut documents_seen = RoaringBitmap::new();
let merged_result: Result<Vec<_>, ResponseError> =
merge_index_local_results(results_by_query)
// skip documents we've already seen & mark that we saw the current document
.filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid))
.take(required_hit_count)
// 2.3 make hits
.map(
|SearchResultByQueryIterItem {
docid,
score,
federation_options,
hit_maker,
query_index,
}| {
let mut hit = hit_maker.make_hit(docid, &score)?;
let weighted_score =
ScoreDetails::global_score(score.iter()) * (*federation_options.weight);
let _federation = serde_json::json!(
{
"indexUid": index_uid,
"queriesPosition": query_index,
"weightedRankingScore": weighted_score,
}
);
hit.document.insert("_federation".to_string(), _federation);
Ok(SearchHitByIndex { hit, score, federation_options, query_index })
},
)
.collect();
let merged_result = merged_result?;
results_by_index.push(SearchResultByIndex {
hits: merged_result,
candidates,
degraded,
used_negative_operator,
});
}
// 3. merge hits and metadata across indexes
// 3.1 merge metadata
let (estimated_total_hits, degraded, used_negative_operator) = {
let mut estimated_total_hits = 0;
let mut degraded = false;
let mut used_negative_operator = false;
for SearchResultByIndex {
hits: _,
candidates,
degraded: degraded_by_index,
used_negative_operator: used_negative_operator_by_index,
} in &results_by_index
{
estimated_total_hits += candidates.len() as usize;
degraded |= *degraded_by_index;
used_negative_operator |= *used_negative_operator_by_index;
}
(estimated_total_hits, degraded, used_negative_operator)
};
// 3.2 merge hits
let merged_hits: Vec<_> = merge_index_global_results(results_by_index)
.skip(federation.offset)
.take(federation.limit)
.inspect(|hit| {
if let Some(semantic_hit_count) = &mut semantic_hit_count {
if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) {
*semantic_hit_count += 1;
}
}
})
.map(|hit| hit.hit)
.collect();
let search_result = FederatedSearchResult {
hits: merged_hits,
processing_time_ms: before_search.elapsed().as_millis(),
hits_info: HitsInfo::OffsetLimit {
limit: federation.limit,
offset: federation.offset,
estimated_total_hits,
},
semantic_hit_count,
degraded,
used_negative_operator,
};
Ok(search_result)
}

View File

@ -1,823 +0,0 @@
use std::collections::HashMap;
use std::fmt::Write;
use itertools::Itertools as _;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::milli::{AscDesc, Criterion, Member, TermsMatchingStrategy};
pub struct RankingRules {
canonical_criteria: Vec<Criterion>,
canonical_sort: Option<Vec<AscDesc>>,
canonicalization_actions: Vec<CanonicalizationAction>,
source_criteria: Vec<Criterion>,
source_sort: Option<Vec<AscDesc>>,
}
pub enum CanonicalizationAction {
PrependedWords {
prepended_index: RankingRuleSource,
},
RemovedDuplicate {
earlier_occurrence: RankingRuleSource,
removed_occurrence: RankingRuleSource,
},
RemovedWords {
reason: RemoveWords,
removed_occurrence: RankingRuleSource,
},
RemovedPlaceholder {
removed_occurrence: RankingRuleSource,
},
TruncatedVector {
vector_rule: RankingRuleSource,
truncated_from: RankingRuleSource,
},
RemovedVector {
vector_rule: RankingRuleSource,
removed_occurrence: RankingRuleSource,
},
RemovedSort {
removed_occurrence: RankingRuleSource,
},
}
pub enum RemoveWords {
WasPrepended,
MatchingStrategyAll,
}
impl std::fmt::Display for RemoveWords {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let reason = match self {
RemoveWords::WasPrepended => "it was previously prepended",
RemoveWords::MatchingStrategyAll => "`query.matchingWords` is set to `all`",
};
f.write_str(reason)
}
}
pub enum CanonicalizationKind {
Placeholder,
Keyword,
Vector,
}
pub struct CompatibilityError {
previous: RankingRule,
current: RankingRule,
}
impl CompatibilityError {
pub(crate) fn to_response_error(
&self,
ranking_rules: &RankingRules,
previous_ranking_rules: &RankingRules,
query_index: usize,
previous_query_index: usize,
index_uid: &str,
previous_index_uid: &str,
) -> meilisearch_types::error::ResponseError {
let rule = self.current.as_string(
&ranking_rules.canonical_criteria,
&ranking_rules.canonical_sort,
query_index,
index_uid,
);
let previous_rule = self.previous.as_string(
&previous_ranking_rules.canonical_criteria,
&previous_ranking_rules.canonical_sort,
previous_query_index,
previous_index_uid,
);
let canonicalization_actions = ranking_rules.canonicalization_notes();
let previous_canonicalization_actions = previous_ranking_rules.canonicalization_notes();
let mut msg = String::new();
let reason = self.reason();
let _ = writeln!(
&mut msg,
"The results of queries #{previous_query_index} and #{query_index} are incompatible: "
);
let _ = writeln!(&mut msg, " 1. {previous_rule}");
let _ = writeln!(&mut msg, " 2. {rule}");
let _ = writeln!(&mut msg, " - {reason}");
if !previous_canonicalization_actions.is_empty() {
let _ = write!(&mut msg, " - note: The ranking rules of query #{previous_query_index} were modified during canonicalization:\n{previous_canonicalization_actions}");
}
if !canonicalization_actions.is_empty() {
let _ = write!(&mut msg, " - note: The ranking rules of query #{query_index} were modified during canonicalization:\n{canonicalization_actions}");
}
ResponseError::from_msg(msg, Code::InvalidMultiSearchQueryRankingRules)
}
pub fn reason(&self) -> &'static str {
match (self.previous.kind, self.current.kind) {
(RankingRuleKind::Relevancy, RankingRuleKind::AscendingSort)
| (RankingRuleKind::Relevancy, RankingRuleKind::DescendingSort)
| (RankingRuleKind::AscendingSort, RankingRuleKind::Relevancy)
| (RankingRuleKind::DescendingSort, RankingRuleKind::Relevancy) => {
"cannot compare a relevancy rule with a sort rule"
}
(RankingRuleKind::Relevancy, RankingRuleKind::AscendingGeoSort)
| (RankingRuleKind::Relevancy, RankingRuleKind::DescendingGeoSort)
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::Relevancy)
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::Relevancy) => {
"cannot compare a relevancy rule with a geosort rule"
}
(RankingRuleKind::AscendingSort, RankingRuleKind::DescendingSort)
| (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingSort) => {
"cannot compare two sort rules in opposite directions"
}
(RankingRuleKind::AscendingSort, RankingRuleKind::AscendingGeoSort)
| (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingGeoSort)
| (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingGeoSort)
| (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingGeoSort)
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingSort)
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingSort)
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingSort)
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingSort) => {
"cannot compare a sort rule with a geosort rule"
}
(RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingGeoSort)
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingGeoSort) => {
"cannot compare two geosort rules in opposite directions"
}
(RankingRuleKind::Relevancy, RankingRuleKind::Relevancy)
| (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingSort)
| (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingSort)
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingGeoSort)
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingGeoSort) => {
"internal error, comparison should be possible"
}
}
}
}
impl RankingRules {
pub fn new(
criteria: Vec<Criterion>,
sort: Option<Vec<AscDesc>>,
terms_matching_strategy: TermsMatchingStrategy,
canonicalization_kind: CanonicalizationKind,
) -> Self {
let (canonical_criteria, canonical_sort, canonicalization_actions) =
Self::canonicalize(&criteria, &sort, terms_matching_strategy, canonicalization_kind);
Self {
canonical_criteria,
canonical_sort,
canonicalization_actions,
source_criteria: criteria,
source_sort: sort,
}
}
fn canonicalize(
criteria: &[Criterion],
sort: &Option<Vec<AscDesc>>,
terms_matching_strategy: TermsMatchingStrategy,
canonicalization_kind: CanonicalizationKind,
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
match canonicalization_kind {
CanonicalizationKind::Placeholder => Self::canonicalize_placeholder(criteria, sort),
CanonicalizationKind::Keyword => {
Self::canonicalize_keyword(criteria, sort, terms_matching_strategy)
}
CanonicalizationKind::Vector => Self::canonicalize_vector(criteria, sort),
}
}
fn canonicalize_placeholder(
criteria: &[Criterion],
sort_query: &Option<Vec<AscDesc>>,
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
let mut sort = None;
let mut sorted_fields = HashMap::new();
let mut canonicalization_actions = Vec::new();
let mut canonical_criteria = Vec::new();
let mut canonical_sort = None;
for (criterion_index, criterion) in criteria.iter().enumerate() {
match criterion.clone() {
Criterion::Words
| Criterion::Typo
| Criterion::Proximity
| Criterion::Attribute
| Criterion::Exactness => {
canonicalization_actions.push(CanonicalizationAction::RemovedPlaceholder {
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
})
}
Criterion::Sort => {
if let Some(previous_index) = sort {
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
} else if let Some(sort_query) = sort_query {
sort = Some(criterion_index);
canonical_criteria.push(criterion.clone());
canonical_sort = Some(canonicalize_sort(
&mut sorted_fields,
sort_query.as_slice(),
criterion_index,
&mut canonicalization_actions,
));
} else {
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
})
}
}
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: *entry.get(),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
}),
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(RankingRuleSource::Criterion(criterion_index));
canonical_criteria.push(criterion.clone())
}
},
}
}
(canonical_criteria, canonical_sort, canonicalization_actions)
}
fn canonicalize_vector(
criteria: &[Criterion],
sort_query: &Option<Vec<AscDesc>>,
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
let mut sort = None;
let mut sorted_fields = HashMap::new();
let mut canonicalization_actions = Vec::new();
let mut canonical_criteria = Vec::new();
let mut canonical_sort = None;
let mut vector = None;
'criteria: for (criterion_index, criterion) in criteria.iter().enumerate() {
match criterion.clone() {
Criterion::Words
| Criterion::Typo
| Criterion::Proximity
| Criterion::Attribute
| Criterion::Exactness => match vector {
Some(previous_occurrence) => {
if sorted_fields.is_empty() {
canonicalization_actions.push(CanonicalizationAction::RemovedVector {
vector_rule: RankingRuleSource::Criterion(previous_occurrence),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
} else {
canonicalization_actions.push(
CanonicalizationAction::TruncatedVector {
vector_rule: RankingRuleSource::Criterion(previous_occurrence),
truncated_from: RankingRuleSource::Criterion(criterion_index),
},
);
break 'criteria;
}
}
None => {
canonical_criteria.push(criterion.clone());
vector = Some(criterion_index);
}
},
Criterion::Sort => {
if let Some(previous_index) = sort {
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
} else if let Some(sort_query) = sort_query {
sort = Some(criterion_index);
canonical_criteria.push(criterion.clone());
canonical_sort = Some(canonicalize_sort(
&mut sorted_fields,
sort_query.as_slice(),
criterion_index,
&mut canonicalization_actions,
));
} else {
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
})
}
}
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: *entry.get(),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
}),
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(RankingRuleSource::Criterion(criterion_index));
canonical_criteria.push(criterion.clone())
}
},
}
}
(canonical_criteria, canonical_sort, canonicalization_actions)
}
fn canonicalize_keyword(
criteria: &[Criterion],
sort_query: &Option<Vec<AscDesc>>,
terms_matching_strategy: TermsMatchingStrategy,
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
let mut words = None;
let mut typo = None;
let mut proximity = None;
let mut sort = None;
let mut attribute = None;
let mut exactness = None;
let mut sorted_fields = HashMap::new();
let mut canonical_criteria = Vec::new();
let mut canonical_sort = None;
let mut canonicalization_actions = Vec::new();
for (criterion_index, criterion) in criteria.iter().enumerate() {
let criterion = criterion.clone();
match criterion.clone() {
Criterion::Words => {
if let TermsMatchingStrategy::All = terms_matching_strategy {
canonicalization_actions.push(CanonicalizationAction::RemovedWords {
reason: RemoveWords::MatchingStrategyAll,
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
continue;
}
if let Some(maybe_previous_index) = words {
if let Some(previous_index) = maybe_previous_index {
canonicalization_actions.push(
CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Criterion(
previous_index,
),
removed_occurrence: RankingRuleSource::Criterion(
criterion_index,
),
},
);
continue;
}
canonicalization_actions.push(CanonicalizationAction::RemovedWords {
reason: RemoveWords::WasPrepended,
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
})
}
words = Some(Some(criterion_index));
canonical_criteria.push(criterion);
}
Criterion::Typo => {
canonicalize_criterion(
criterion,
criterion_index,
terms_matching_strategy,
&mut words,
&mut canonicalization_actions,
&mut canonical_criteria,
&mut typo,
);
}
Criterion::Proximity => {
canonicalize_criterion(
criterion,
criterion_index,
terms_matching_strategy,
&mut words,
&mut canonicalization_actions,
&mut canonical_criteria,
&mut proximity,
);
}
Criterion::Attribute => {
canonicalize_criterion(
criterion,
criterion_index,
terms_matching_strategy,
&mut words,
&mut canonicalization_actions,
&mut canonical_criteria,
&mut attribute,
);
}
Criterion::Exactness => {
canonicalize_criterion(
criterion,
criterion_index,
terms_matching_strategy,
&mut words,
&mut canonicalization_actions,
&mut canonical_criteria,
&mut exactness,
);
}
Criterion::Sort => {
if let Some(previous_index) = sort {
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
} else if let Some(sort_query) = sort_query {
sort = Some(criterion_index);
canonical_criteria.push(criterion);
canonical_sort = Some(canonicalize_sort(
&mut sorted_fields,
sort_query.as_slice(),
criterion_index,
&mut canonicalization_actions,
));
} else {
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
})
}
}
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: *entry.get(),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
}),
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(RankingRuleSource::Criterion(criterion_index));
canonical_criteria.push(criterion)
}
},
}
}
(canonical_criteria, canonical_sort, canonicalization_actions)
}
pub fn is_compatible_with(&self, previous: &Self) -> Result<(), CompatibilityError> {
for (current, previous) in self.coalesce_iterator().zip(previous.coalesce_iterator()) {
if current.kind != previous.kind {
return Err(CompatibilityError { current, previous });
}
}
Ok(())
}
pub fn constraint_count(&self) -> usize {
self.coalesce_iterator().count()
}
fn coalesce_iterator(&self) -> impl Iterator<Item = RankingRule> + '_ {
self.canonical_criteria
.iter()
.enumerate()
.flat_map(|(criterion_index, criterion)| {
RankingRule::from_criterion(criterion_index, criterion, &self.canonical_sort)
})
.coalesce(
|previous @ RankingRule { source: previous_source, kind: previous_kind },
current @ RankingRule { source, kind }| {
match (previous_kind, kind) {
(RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) => {
let merged_source = match (previous_source, source) {
(
RankingRuleSource::Criterion(previous),
RankingRuleSource::Criterion(current),
) => RankingRuleSource::CoalescedCriteria(previous, current),
(
RankingRuleSource::CoalescedCriteria(begin, _end),
RankingRuleSource::Criterion(current),
) => RankingRuleSource::CoalescedCriteria(begin, current),
(_previous, current) => current,
};
Ok(RankingRule { source: merged_source, kind })
}
_ => Err((previous, current)),
}
},
)
}
fn canonicalization_notes(&self) -> String {
use CanonicalizationAction::*;
let mut notes = String::new();
for (index, action) in self.canonicalization_actions.iter().enumerate() {
let index = index + 1;
let _ = match action {
PrependedWords { prepended_index } => writeln!(
&mut notes,
" {index}. Prepended rule `words` before first relevancy rule `{}` at position {}",
prepended_index.rule_name(&self.source_criteria, &self.source_sort),
prepended_index.rule_position()
),
RemovedDuplicate { earlier_occurrence, removed_occurrence } => writeln!(
&mut notes,
" {index}. Removed duplicate rule `{}` at position {} as it already appears at position {}",
earlier_occurrence.rule_name(&self.source_criteria, &self.source_sort),
removed_occurrence.rule_position(),
earlier_occurrence.rule_position(),
),
RemovedWords { reason, removed_occurrence } => writeln!(
&mut notes,
" {index}. Removed rule `words` at position {} because {reason}",
removed_occurrence.rule_position()
),
RemovedPlaceholder { removed_occurrence } => writeln!(
&mut notes,
" {index}. Removed relevancy rule `{}` at position {} because the query is a placeholder search (`q`: \"\")",
removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
removed_occurrence.rule_position()
),
TruncatedVector { vector_rule, truncated_from } => writeln!(
&mut notes,
" {index}. Truncated relevancy rule `{}` at position {} and later rules because the query is a vector search and `vector` was inserted at position {}",
truncated_from.rule_name(&self.source_criteria, &self.source_sort),
truncated_from.rule_position(),
vector_rule.rule_position(),
),
RemovedVector { vector_rule, removed_occurrence } => writeln!(
&mut notes,
" {index}. Removed relevancy rule `{}` at position {} because the query is a vector search and `vector` was already inserted at position {}",
removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
removed_occurrence.rule_position(),
vector_rule.rule_position(),
),
RemovedSort { removed_occurrence } => writeln!(
&mut notes,
" {index}. Removed rule `sort` at position {} because `query.sort` is empty",
removed_occurrence.rule_position()
),
};
}
notes
}
}
fn canonicalize_sort(
sorted_fields: &mut HashMap<String, RankingRuleSource>,
sort_query: &[AscDesc],
criterion_index: usize,
canonicalization_actions: &mut Vec<CanonicalizationAction>,
) -> Vec<AscDesc> {
let mut geo_sorted = None;
let mut canonical_sort = Vec::new();
for (sort_index, asc_desc) in sort_query.iter().enumerate() {
let source = RankingRuleSource::Sort { criterion_index, sort_index };
let asc_desc = asc_desc.clone();
match asc_desc.clone() {
AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
match sorted_fields.entry(s) {
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: *entry.get(),
removed_occurrence: source,
}),
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(source);
canonical_sort.push(asc_desc);
}
}
}
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => match geo_sorted {
Some(earlier_sort_index) => {
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Sort {
criterion_index,
sort_index: earlier_sort_index,
},
removed_occurrence: source,
})
}
None => {
geo_sorted = Some(sort_index);
canonical_sort.push(asc_desc);
}
},
}
}
canonical_sort
}
fn canonicalize_criterion(
criterion: Criterion,
criterion_index: usize,
terms_matching_strategy: TermsMatchingStrategy,
words: &mut Option<Option<usize>>,
canonicalization_actions: &mut Vec<CanonicalizationAction>,
canonical_criteria: &mut Vec<Criterion>,
rule: &mut Option<usize>,
) {
*words = match (terms_matching_strategy, words.take()) {
(TermsMatchingStrategy::All, words) => words,
(_, None) => {
// inject words
canonicalization_actions.push(CanonicalizationAction::PrependedWords {
prepended_index: RankingRuleSource::Criterion(criterion_index),
});
canonical_criteria.push(Criterion::Words);
Some(None)
}
(_, words) => words,
};
if let Some(previous_index) = *rule {
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
} else {
*rule = Some(criterion_index);
canonical_criteria.push(criterion)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RankingRuleKind {
Relevancy,
AscendingSort,
DescendingSort,
AscendingGeoSort,
DescendingGeoSort,
}
#[derive(Debug, Clone, Copy)]
pub struct RankingRule {
source: RankingRuleSource,
kind: RankingRuleKind,
}
#[derive(Debug, Clone, Copy)]
pub enum RankingRuleSource {
Criterion(usize),
CoalescedCriteria(usize, usize),
Sort { criterion_index: usize, sort_index: usize },
}
impl RankingRuleSource {
fn rule_name(&self, criteria: &[Criterion], sort: &Option<Vec<AscDesc>>) -> String {
match self {
RankingRuleSource::Criterion(criterion_index) => criteria
.get(*criterion_index)
.map(|c| c.to_string())
.unwrap_or_else(|| "unknown".into()),
RankingRuleSource::CoalescedCriteria(begin, end) => {
let rules: Vec<_> = criteria
.get(*begin..=*end)
.iter()
.flat_map(|c| c.iter())
.map(|c| c.to_string())
.collect();
rules.join(", ")
}
RankingRuleSource::Sort { criterion_index: _, sort_index } => {
match sort.as_deref().and_then(|sort| sort.get(*sort_index)) {
Some(sort) => match sort {
AscDesc::Asc(Member::Field(field_name)) => format!("{field_name}:asc"),
AscDesc::Desc(Member::Field(field_name)) => {
format!("{field_name}:desc")
}
AscDesc::Asc(Member::Geo(_)) => "_geo(..):asc".to_string(),
AscDesc::Desc(Member::Geo(_)) => "_geo(..):desc".to_string(),
},
None => "unknown".into(),
}
}
}
}
fn rule_position(&self) -> String {
match self {
RankingRuleSource::Criterion(criterion_index) => {
format!("#{criterion_index} in ranking rules")
}
RankingRuleSource::CoalescedCriteria(begin, end) => {
format!("#{begin} to #{end} in ranking rules")
}
RankingRuleSource::Sort { criterion_index, sort_index } => format!(
"#{sort_index} in `query.sort` (as `sort` is #{criterion_index} in ranking rules)"
),
}
}
}
impl RankingRule {
fn from_criterion<'a>(
criterion_index: usize,
criterion: &'a Criterion,
sort: &'a Option<Vec<AscDesc>>,
) -> impl Iterator<Item = Self> + 'a {
let kind = match criterion {
Criterion::Words
| Criterion::Typo
| Criterion::Proximity
| Criterion::Attribute
| Criterion::Exactness => RankingRuleKind::Relevancy,
Criterion::Asc(s) if s == "_geo" => RankingRuleKind::AscendingGeoSort,
Criterion::Asc(_) => RankingRuleKind::AscendingSort,
Criterion::Desc(s) if s == "_geo" => RankingRuleKind::DescendingGeoSort,
Criterion::Desc(_) => RankingRuleKind::DescendingSort,
Criterion::Sort => {
return either::Right(sort.iter().flatten().enumerate().map(
move |(rule_index, asc_desc)| {
Self::from_asc_desc(asc_desc, criterion_index, rule_index)
},
))
}
};
either::Left(std::iter::once(Self {
source: RankingRuleSource::Criterion(criterion_index),
kind,
}))
}
fn from_asc_desc(asc_desc: &AscDesc, sort_index: usize, rule_index_in_sort: usize) -> Self {
let kind = match asc_desc {
AscDesc::Asc(Member::Field(_)) => RankingRuleKind::AscendingSort,
AscDesc::Desc(Member::Field(_)) => RankingRuleKind::DescendingSort,
AscDesc::Asc(Member::Geo(_)) => RankingRuleKind::AscendingGeoSort,
AscDesc::Desc(Member::Geo(_)) => RankingRuleKind::DescendingGeoSort,
};
Self {
source: RankingRuleSource::Sort {
criterion_index: sort_index,
sort_index: rule_index_in_sort,
},
kind,
}
}
fn as_string(
&self,
canonical_criteria: &[Criterion],
canonical_sort: &Option<Vec<AscDesc>>,
query_index: usize,
index_uid: &str,
) -> String {
let kind = match self.kind {
RankingRuleKind::Relevancy => "relevancy",
RankingRuleKind::AscendingSort => "ascending sort",
RankingRuleKind::DescendingSort => "descending sort",
RankingRuleKind::AscendingGeoSort => "ascending geo sort",
RankingRuleKind::DescendingGeoSort => "descending geo sort",
};
let rules = self.fetch_from_source(canonical_criteria, canonical_sort);
let source = match self.source {
RankingRuleSource::Criterion(criterion_index) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
RankingRuleSource::CoalescedCriteria(begin, end) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{begin}..={end}]`"),
RankingRuleSource::Sort { criterion_index, sort_index } => format!("`queries[{query_index}].sort[{sort_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
};
format!("{source}: {kind} {rules}")
}
fn fetch_from_source(
&self,
canonical_criteria: &[Criterion],
canonical_sort: &Option<Vec<AscDesc>>,
) -> String {
let rule_name = match self.source {
RankingRuleSource::Criterion(index) => {
canonical_criteria.get(index).map(|criterion| criterion.to_string())
}
RankingRuleSource::CoalescedCriteria(begin, end) => {
let rules: Vec<String> = canonical_criteria
.get(begin..=end)
.into_iter()
.flat_map(|criteria| criteria.iter())
.map(|criterion| criterion.to_string())
.collect();
(!rules.is_empty()).then_some(rules.join(", "))
}
RankingRuleSource::Sort { criterion_index: _, sort_index } => canonical_sort
.as_deref()
.and_then(|canonical_sort| canonical_sort.get(sort_index))
.and_then(|asc_desc: &AscDesc| match asc_desc {
AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
Some(format!("on field `{s}`"))
}
_ => None,
}),
};
let rule_name = rule_name.unwrap_or_else(|| "default".into());
format!("rule(s) {rule_name}")
}
}

View File

@ -1,5 +1,5 @@
use actix_web::http::StatusCode;
use actix_web::test; use actix_web::test;
use http::StatusCode;
use jsonwebtoken::{EncodingKey, Header}; use jsonwebtoken::{EncodingKey, Header};
use meili_snap::*; use meili_snap::*;
use uuid::Uuid; use uuid::Uuid;

View File

@ -310,23 +310,6 @@ macro_rules! compute_authorized_single_search {
tenant_token, tenant_token,
key_content key_content
); );
// federated
let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales", "filter": $filter}]})).await;
assert_eq!(
200, code,
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response, tenant_token, key_content
);
assert_eq!(
// same count as the search is federated over a single query
$expected_count,
response["hits"].as_array().unwrap().len(),
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response,
tenant_token,
key_content
);
} }
} }
}; };
@ -392,25 +375,6 @@ macro_rules! compute_authorized_multiple_search {
tenant_token, tenant_token,
key_content key_content
); );
let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [
{"indexUid": "sales", "filter": $filter1},
{"indexUid": "products", "filter": $filter2},
]})).await;
assert_eq!(
code, 200,
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response, tenant_token, key_content
);
assert_eq!(
response["hits"].as_array().unwrap().len(),
// sum of counts as the search is federated across to queries in different indexes
$expected_count1 + $expected_count2,
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response,
tenant_token,
key_content
);
} }
} }
}; };
@ -469,24 +433,6 @@ macro_rules! compute_forbidden_single_search {
"{} using tenant_token: {:?} generated with parent_key: {:?}", "{} using tenant_token: {:?} generated with parent_key: {:?}",
response, tenant_token, key_content response, tenant_token, key_content
); );
let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales"}]})).await;
if failed_query_index.is_none() && !response["message"].is_null() {
response["message"] = serde_json::json!(null);
}
assert_eq!(
response,
invalid_response(failed_query_index),
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response,
tenant_token,
key_content
);
assert_eq!(
code, 403,
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response, tenant_token, key_content
);
} }
} }
}; };
@ -548,27 +494,6 @@ macro_rules! compute_forbidden_multiple_search {
"{} using tenant_token: {:?} generated with parent_key: {:?}", "{} using tenant_token: {:?} generated with parent_key: {:?}",
response, tenant_token, key_content response, tenant_token, key_content
); );
let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [
{"indexUid": "sales"},
{"indexUid": "products"},
]})).await;
if failed_query_index.is_none() && !response["message"].is_null() {
response["message"] = serde_json::json!(null);
}
assert_eq!(
response,
invalid_response(failed_query_index),
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response,
tenant_token,
key_content
);
assert_eq!(
code, 403,
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response, tenant_token, key_content
);
} }
} }
}; };

View File

@ -26,15 +26,6 @@ impl Value {
panic!("Didn't find any task id in: {self}"); panic!("Didn't find any task id in: {self}");
} }
} }
// Panic if the json doesn't contain the `status` field set to "succeeded"
#[track_caller]
pub fn succeeded(&self) -> &Self {
if self["status"] != serde_json::Value::String(String::from("succeeded")) {
panic!("Called succeeded on {}", serde_json::to_string_pretty(&self.0).unwrap());
}
self
}
} }
impl From<serde_json::Value> for Value { impl From<serde_json::Value> for Value {

View File

@ -6,7 +6,7 @@ use std::time::Duration;
use actix_http::body::MessageBody; use actix_http::body::MessageBody;
use actix_web::dev::ServiceResponse; use actix_web::dev::ServiceResponse;
use actix_web::http::StatusCode; use actix_web::http::StatusCode;
use byte_unit::{Byte, Unit}; use byte_unit::{Byte, ByteUnit};
use clap::Parser; use clap::Parser;
use meilisearch::option::{IndexerOpts, MaxMemory, Opt}; use meilisearch::option::{IndexerOpts, MaxMemory, Opt};
use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer}; use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer};
@ -231,9 +231,9 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
env: "development".to_owned(), env: "development".to_owned(),
#[cfg(feature = "analytics")] #[cfg(feature = "analytics")]
no_analytics: true, no_analytics: true,
max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(), max_index_size: Byte::from_unit(100.0, ByteUnit::MiB).unwrap(),
max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(), max_task_db_size: Byte::from_unit(1.0, ByteUnit::GiB).unwrap(),
http_payload_size_limit: Byte::from_u64_with_unit(10, Unit::MiB).unwrap(), http_payload_size_limit: Byte::from_unit(10.0, ByteUnit::MiB).unwrap(),
snapshot_dir: ".".into(), snapshot_dir: ".".into(),
indexer_options: IndexerOpts { indexer_options: IndexerOpts {
// memory has to be unlimited because several meilisearch are running in test context. // memory has to be unlimited because several meilisearch are running in test context.

View File

@ -2274,7 +2274,7 @@ async fn error_add_documents_payload_size() {
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###" @r###"
{ {
"message": "The provided payload reached the size limit. The maximum accepted payload size is 10 MiB.", "message": "The provided payload reached the size limit. The maximum accepted payload size is 10.00 MiB.",
"code": "payload_too_large", "code": "payload_too_large",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#payload_too_large" "link": "https://docs.meilisearch.com/errors#payload_too_large"

View File

@ -168,7 +168,7 @@ async fn get_all_documents_bad_filter() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
"code": "invalid_document_filter", "code": "invalid_document_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter" "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -569,7 +569,7 @@ async fn delete_document_by_filter() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"code": "invalid_document_filter", "code": "invalid_document_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter" "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -776,7 +776,7 @@ async fn fetch_document_by_filter() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"code": "invalid_document_filter", "code": "invalid_document_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter" "link": "https://docs.meilisearch.com/errors#invalid_document_filter"

View File

@ -1,5 +1,5 @@
use actix_web::http::header::ACCEPT_ENCODING;
use actix_web::test; use actix_web::test;
use http::header::ACCEPT_ENCODING;
use meili_snap::*; use meili_snap::*;
use urlencoding::encode as urlencode; use urlencoding::encode as urlencode;
@ -535,9 +535,7 @@ async fn get_document_with_vectors() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -646,12 +644,7 @@ async fn get_document_with_vectors() {
{ {
"id": 1, "id": 1,
"name": "echo", "name": "echo",
"_vectors": { "_vectors": {}
"manual": {
"embeddings": [],
"regenerate": false
}
}
} }
], ],
"offset": 0, "offset": 0,
@ -707,12 +700,7 @@ async fn get_document_with_vectors() {
}, },
{ {
"name": "echo", "name": "echo",
"_vectors": { "_vectors": {}
"manual": {
"embeddings": [],
"regenerate": false
}
}
} }
], ],
"offset": 0, "offset": 0,

View File

@ -1859,9 +1859,7 @@ async fn import_dump_v6_containing_experimental_features() {
{ {
"vectorStore": false, "vectorStore": false,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -1954,9 +1952,7 @@ async fn generate_and_import_dump_containing_vectors() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
let index = server.index("pets"); let index = server.index("pets");
@ -2026,9 +2022,7 @@ async fn generate_and_import_dump_containing_vectors() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);

View File

@ -20,9 +20,7 @@ async fn experimental_features() {
{ {
"vectorStore": false, "vectorStore": false,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -33,9 +31,7 @@ async fn experimental_features() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -46,9 +42,7 @@ async fn experimental_features() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -60,9 +54,7 @@ async fn experimental_features() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -74,9 +66,7 @@ async fn experimental_features() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
} }
@ -95,9 +85,7 @@ async fn experimental_feature_metrics() {
{ {
"vectorStore": false, "vectorStore": false,
"metrics": true, "metrics": true,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -152,7 +140,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`", "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`",
"code": "bad_request", "code": "bad_request",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request" "link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -1,5 +1,6 @@
use actix_web::http::header::{ContentType, ACCEPT_ENCODING}; use actix_web::http::header::ContentType;
use actix_web::test; use actix_web::test;
use http::header::ACCEPT_ENCODING;
use meili_snap::{json_string, snapshot}; use meili_snap::{json_string, snapshot};
use meilisearch::Opt; use meilisearch::Opt;

View File

@ -645,20 +645,19 @@ async fn filter_invalid_syntax_object() {
index.update_settings(json!({"filterableAttributes": ["title"]})).await; index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone(); let documents = DOCUMENTS.clone();
let (task, _code) = index.add_documents(documents, None).await; index.add_documents(documents, None).await;
index.wait_task(task.uid()).await; index.wait_task(1).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index index
.search(json!({"filter": "title & Glass"}), |response, code| { .search(json!({"filter": "title & Glass"}), |response, code| {
snapshot!(response, @r###" assert_eq!(response, expected_response);
{ assert_eq!(code, 400);
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
}) })
.await; .await;
} }
@ -671,20 +670,19 @@ async fn filter_invalid_syntax_array() {
index.update_settings(json!({"filterableAttributes": ["title"]})).await; index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone(); let documents = DOCUMENTS.clone();
let (task, _code) = index.add_documents(documents, None).await; index.add_documents(documents, None).await;
index.wait_task(task.uid()).await; index.wait_task(1).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index index
.search(json!({"filter": ["title & Glass"]}), |response, code| { .search(json!({"filter": ["title & Glass"]}), |response, code| {
snapshot!(response, @r###" assert_eq!(response, expected_response);
{ assert_eq!(code, 400);
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
}) })
.await; .await;
} }
@ -1205,68 +1203,3 @@ async fn distinct_at_search_time() {
} }
"###); "###);
} }
#[actix_rt::test]
async fn search_with_contains_without_enabling_the_feature() {
// Since a filter is deserialized as a json Value it will never fail to deserialize.
// Thus the error message is not generated by deserr but written by us.
let server = Server::new().await;
let index = server.index("doggo");
// Also, to trigger the error message we need to effectively create the index or else it'll throw an
// index does not exists error.
let (task, _code) = index.create(None).await;
server.wait_task(task.uid()).await.succeeded();
index
.search(json!({ "filter": "doggo CONTAINS kefir" }), |response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
})
.await;
index
.search(json!({ "filter": "doggo != echo AND doggo CONTAINS kefir" }), |response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
})
.await;
// For the post search we can also use the arrays syntaxes
let (response, code) =
index.search_post(json!({ "filter": ["doggo != echo", "doggo CONTAINS kefir"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) =
index.search_post(json!({ "filter": ["doggo != echo", ["doggo CONTAINS kefir"]] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}

View File

@ -18,9 +18,7 @@ async fn index_with_documents_user_provided<'a>(
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -48,9 +46,7 @@ async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> I
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);

View File

@ -13,11 +13,9 @@ mod pagination;
mod restrict_searchable; mod restrict_searchable;
mod search_queue; mod search_queue;
use meilisearch::Opt;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use tempfile::TempDir;
use crate::common::{default_settings, Server, Value}; use crate::common::{Server, Value};
use crate::json; use crate::json;
static DOCUMENTS: Lazy<Value> = Lazy::new(|| { static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
@ -134,79 +132,6 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
]) ])
}); });
static FRUITS_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"name": "Exclusive sale: green apple",
"id": "green-apple-boosted",
"BOOST": true
},
{
"name": "Pear",
"id": "pear",
},
{
"name": "Red apple gala",
"id": "red-apple-gala",
},
{
"name": "Exclusive sale: Red Tomato",
"id": "red-tomatoes-boosted",
"BOOST": true
},
{
"name": "Exclusive sale: Red delicious apple",
"id": "red-delicious-boosted",
"BOOST": true,
}
])
});
static VECTOR_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": "A",
"description": "the dog barks at the cat",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.9, 0.8, 0.05],
// dimensions [negative/positive, energy]
"sentiment": [-0.1, 0.55]
}
},
{
"id": "B",
"description": "the kitten scratched the beagle",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.8, 0.9, 0.5],
// dimensions [negative/positive, energy]
"sentiment": [-0.2, 0.65]
}
},
{
"id": "C",
"description": "the dog had to stay alone today",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.85, 0.02, 0.1],
// dimensions [negative/positive, energy]
"sentiment": [-1.0, 0.1]
}
},
{
"id": "D",
"description": "the little boy pets the puppy",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.8, 0.09, 0.8],
// dimensions [negative/positive, energy]
"sentiment": [0.8, 0.3]
}
},
])
});
#[actix_rt::test] #[actix_rt::test]
async fn simple_placeholder_search() { async fn simple_placeholder_search() {
let server = Server::new().await; let server = Server::new().await;
@ -578,32 +503,6 @@ async fn search_with_filter_array_notation() {
assert_eq!(response["hits"].as_array().unwrap().len(), 3); assert_eq!(response["hits"].as_array().unwrap().len(), 3);
} }
#[actix_rt::test]
async fn search_with_contains_filter() {
let temp = TempDir::new().unwrap();
let server = Server::new_with_options(Opt {
experimental_contains_filter: true,
..default_settings(temp.path())
})
.await
.unwrap();
let index = server.index("movies");
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
let (request, _code) = index.add_documents(documents, None).await;
index.wait_task(request.uid()).await.succeeded();
let (response, code) = index
.search_post(json!({
"filter": "title CONTAINS cap"
}))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
}
#[actix_rt::test] #[actix_rt::test]
async fn search_with_sort_on_numbers() { async fn search_with_sort_on_numbers() {
let server = Server::new().await; let server = Server::new().await;

File diff suppressed because it is too large Load Diff

View File

@ -98,9 +98,7 @@ async fn secrets_are_hidden_in_settings() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);

View File

@ -360,17 +360,16 @@ async fn filter_invalid_syntax_object() {
snapshot!(code, @"202 Accepted"); snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await; index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index index
.similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| { .similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| {
snapshot!(response, @r###" assert_eq!(response, expected_response);
{ assert_eq!(code, 400);
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
}) })
.await; .await;
} }
@ -399,17 +398,16 @@ async fn filter_invalid_syntax_array() {
snapshot!(code, @"202 Accepted"); snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await; index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index index
.similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| { .similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| {
snapshot!(response, @r###" assert_eq!(response, expected_response);
{ assert_eq!(code, 400);
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
}) })
.await; .await;
} }

View File

@ -55,9 +55,7 @@ async fn basic() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -246,9 +244,7 @@ async fn ranking_score_threshold() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -527,9 +523,7 @@ async fn filter() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -656,9 +650,7 @@ async fn limit_and_offset() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);

View File

@ -97,7 +97,7 @@ async fn task_bad_types() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types", "code": "invalid_task_types",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types" "link": "https://docs.meilisearch.com/errors#invalid_task_types"
@ -108,7 +108,7 @@ async fn task_bad_types() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types", "code": "invalid_task_types",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types" "link": "https://docs.meilisearch.com/errors#invalid_task_types"
@ -119,7 +119,7 @@ async fn task_bad_types() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types", "code": "invalid_task_types",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types" "link": "https://docs.meilisearch.com/errors#invalid_task_types"

View File

@ -16,9 +16,7 @@ async fn add_remove_user_provided() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -121,12 +119,7 @@ async fn add_remove_user_provided() {
{ {
"id": 1, "id": 1,
"name": "echo", "name": "echo",
"_vectors": { "_vectors": {}
"manual": {
"embeddings": [],
"regenerate": false
}
}
} }
], ],
"offset": 0, "offset": 0,
@ -148,12 +141,7 @@ async fn add_remove_user_provided() {
{ {
"id": 1, "id": 1,
"name": "echo", "name": "echo",
"_vectors": { "_vectors": {}
"manual": {
"embeddings": [],
"regenerate": false
}
}
} }
], ],
"offset": 0, "offset": 0,
@ -171,9 +159,7 @@ async fn generate_default_user_provided_documents(server: &Server) -> Index {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -483,99 +469,6 @@ async fn user_provided_embeddings_error() {
"###); "###);
} }
#[actix_rt::test]
async fn user_provided_vectors_error() {
let server = Server::new().await;
let index = generate_default_user_provided_documents(&server).await;
// First case, we forget to specify `_vectors`
let documents = json!({"id": 42, "name": "kefir"});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 2,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: \\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// Second case, we provide `_vectors` with a typo
let documents = json!({"id": 42, "name": "kefir", "_vector": { "manaul": [0, 0, 0] }});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 3,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: \\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n _vector: manaul000\\n _vector.manaul: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// Third case, we specify the embedder with a typo
let documents = json!({"id": 42, "name": "kefir", "_vectors": { "manaul": [0, 0, 0] }});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": 4,
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: manaul000\\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n _vectors.manaul: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test] #[actix_rt::test]
async fn clear_documents() { async fn clear_documents() {
let server = Server::new().await; let server = Server::new().await;
@ -623,9 +516,7 @@ async fn add_remove_one_vector_4588() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);
@ -686,12 +577,7 @@ async fn add_remove_one_vector_4588() {
{ {
"id": 0, "id": 0,
"name": "kefir", "name": "kefir",
"_vectors": { "_vectors": {}
"manual": {
"embeddings": [],
"regenerate": false
}
}
} }
], ],
"offset": 0, "offset": 0,

View File

@ -14,9 +14,7 @@ async fn update_embedder() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false, "logsRoute": false
"editDocumentsByFunction": false,
"containsFilter": false
} }
"###); "###);

View File

@ -9,11 +9,11 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.86" anyhow = "1.0.79"
clap = { version = "4.5.9", features = ["derive"] } clap = { version = "4.4.17", features = ["derive"] }
dump = { path = "../dump" } dump = { path = "../dump" }
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
time = { version = "0.3.36", features = ["formatting"] } time = { version = "0.3.31", features = ["formatting"] }
uuid = { version = "1.10.0", features = ["v4"], default-features = false } uuid = { version = "1.6.1", features = ["v4"], default-features = false }

View File

@ -1,6 +1,6 @@
[package] [package]
name = "milli" name = "milli"
edition = "2021" edition = "2018"
publish = false publish = false
version.workspace = true version.workspace = true
@ -14,84 +14,81 @@ license.workspace = true
[dependencies] [dependencies]
bimap = { version = "0.6.3", features = ["serde"] } bimap = { version = "0.6.3", features = ["serde"] }
bincode = "1.3.3" bincode = "1.3.3"
bstr = "1.9.1" bstr = "1.9.0"
bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] } bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0" byteorder = "1.5.0"
charabia = { version = "0.8.12", default-features = false } charabia = { version = "0.8.11", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
crossbeam-channel = "0.5.13" crossbeam-channel = "0.5.11"
deserr = "0.6.2" deserr = "0.6.1"
either = { version = "1.13.0", features = ["serde"] } either = { version = "1.9.0", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" } flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7" fst = "0.4.7"
fxhash = "0.2.1" fxhash = "0.2.1"
geoutils = "0.5.1" geoutils = "0.5.1"
grenad = { version = "0.4.7", default-features = false, features = [ grenad = { version = "0.4.6", default-features = false, features = [
"rayon", "rayon",
"tempfile", "tempfile",
] } ] }
heed = { version = "0.20.3", default-features = false, features = [ heed = { version = "0.20.1", default-features = false, features = [
"serde-json", "serde-json",
"serde-bincode", "serde-bincode",
"read-txn-no-tls", "read-txn-no-tls",
] } ] }
indexmap = { version = "2.2.6", features = ["serde"] } indexmap = { version = "2.1.0", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" } json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memchr = "2.5.0" memmap2 = "0.7.1"
memmap2 = "0.9.4" obkv = "0.2.1"
obkv = "0.2.2"
once_cell = "1.19.0" once_cell = "1.19.0"
ordered-float = "4.2.1" ordered-float = "4.2.0"
rayon = "1.10.0" rand_pcg = { version = "0.3.1", features = ["serde1"] }
roaring = { version = "0.10.6", features = ["serde"] } rayon = "1.8.0"
rstar = { version = "0.12.0", features = ["serde"] } roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] } rstar = { version = "0.11.0", features = ["serde"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
slice-group-by = "0.3.1" slice-group-by = "0.3.1"
smallstr = { version = "0.3.0", features = ["serde"] } smallstr = { version = "0.3.0", features = ["serde"] }
smallvec = { version = "1.13.2", features = ["union"] } smallvec = "1.12.0"
smartstring = "1.0.1" smartstring = "1.0.1"
tempfile = "3.10.1" tempfile = "3.9.0"
thiserror = "1.0.61" thiserror = "1.0.56"
time = { version = "0.3.36", features = [ time = { version = "0.3.31", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
"macros", "macros",
] } ] }
uuid = { version = "1.10.0", features = ["v4"] } uuid = { version = "1.6.1", features = ["v4"] }
filter-parser = { path = "../filter-parser" } filter-parser = { path = "../filter-parser" }
# documents words self-join # documents words self-join
itertools = "0.13.0" itertools = "0.11.0"
csv = "1.3.0" csv = "1.3.0"
candle-core = { version = "0.6.0" } candle-core = { version = "0.4.1" }
candle-transformers = { version = "0.6.0" } candle-transformers = { version = "0.4.1" }
candle-nn = { version = "0.6.0" } candle-nn = { version = "0.4.1" }
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [ tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
"onig", "onig",
] } ] }
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
"online", "online",
] } ] }
tiktoken-rs = "0.5.9" tiktoken-rs = "0.5.8"
liquid = "0.26.6" liquid = "0.26.4"
rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] } arroy = { git = "https://github.com/meilisearch/arroy", branch = "binary-quantization" }
arroy = "0.4.0"
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.40" tracing = "0.1.40"
ureq = { version = "2.10.0", features = ["json"] } ureq = { version = "2.9.7", features = ["json"] }
url = "2.5.2" url = "2.5.0"
rayon-par-bridge = "0.1.0"
lru = "0.12.3"
[dev-dependencies] [dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.39", default-features = false }
big_s = "1.0.2" big_s = "1.0.2"
insta = "1.39.0" insta = "1.34.0"
maplit = "1.0.2" maplit = "1.0.2"
md5 = "0.7.0" md5 = "0.7.0"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }

View File

@ -95,7 +95,7 @@ impl<R: io::Read + io::Seek> EnrichedDocumentsBatchCursor<R> {
/// `next_document` advance the document reader until all the documents have been read. /// `next_document` advance the document reader until all the documents have been read.
pub fn next_enriched_document( pub fn next_enriched_document(
&mut self, &mut self,
) -> Result<Option<EnrichedDocument<'_>>, DocumentsBatchCursorError> { ) -> Result<Option<EnrichedDocument>, DocumentsBatchCursorError> {
let document = self.documents.next_document()?; let document = self.documents.next_document()?;
let document_id = match self.external_ids.move_on_next()? { let document_id = match self.external_ids.move_on_next()? {
Some((_, bytes)) => serde_json::from_slice(bytes).map(Some)?, Some((_, bytes)) => serde_json::from_slice(bytes).map(Some)?,

View File

@ -27,7 +27,7 @@ use crate::{FieldId, Object, Result};
const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes(); const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();
/// Helper function to convert an obkv reader into a JSON object. /// Helper function to convert an obkv reader into a JSON object.
pub fn obkv_to_object(obkv: &KvReader<'_, FieldId>, index: &DocumentsBatchIndex) -> Result<Object> { pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
obkv.iter() obkv.iter()
.map(|(field_id, value)| { .map(|(field_id, value)| {
let field_name = index let field_name = index
@ -64,7 +64,7 @@ impl DocumentsBatchIndex {
self.0.len() self.0.len()
} }
pub fn iter(&self) -> bimap::hash::Iter<'_, FieldId, String> { pub fn iter(&self) -> bimap::hash::Iter<FieldId, String> {
self.0.iter() self.0.iter()
} }
@ -76,7 +76,7 @@ impl DocumentsBatchIndex {
self.0.get_by_right(name).cloned() self.0.get_by_right(name).cloned()
} }
pub fn recreate_json(&self, document: &obkv::KvReaderU16<'_>) -> Result<Object> { pub fn recreate_json(&self, document: &obkv::KvReaderU16) -> Result<Object> {
let mut map = Object::new(); let mut map = Object::new();
for (k, v) in document.iter() { for (k, v) in document.iter() {

View File

@ -52,7 +52,7 @@ impl<'a> PrimaryKey<'a> {
pub fn document_id( pub fn document_id(
&self, &self,
document: &obkv::KvReader<'_, FieldId>, document: &obkv::KvReader<FieldId>,
fields: &impl FieldIdMapper, fields: &impl FieldIdMapper,
) -> Result<StdResult<String, DocumentIdExtractionError>> { ) -> Result<StdResult<String, DocumentIdExtractionError>> {
match self { match self {

View File

@ -76,7 +76,7 @@ impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
/// `next_document` advance the document reader until all the documents have been read. /// `next_document` advance the document reader until all the documents have been read.
pub fn next_document( pub fn next_document(
&mut self, &mut self,
) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> { ) -> Result<Option<KvReader<FieldId>>, DocumentsBatchCursorError> {
match self.cursor.move_on_next()? { match self.cursor.move_on_next()? {
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => { Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => {
Ok(Some(KvReader::new(value))) Ok(Some(KvReader::new(value)))
@ -108,7 +108,7 @@ impl From<serde_json::Error> for DocumentsBatchCursorError {
impl error::Error for DocumentsBatchCursorError {} impl error::Error for DocumentsBatchCursorError {}
impl fmt::Display for DocumentsBatchCursorError { impl fmt::Display for DocumentsBatchCursorError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self { match self {
DocumentsBatchCursorError::Grenad(e) => e.fmt(f), DocumentsBatchCursorError::Grenad(e) => e.fmt(f),
DocumentsBatchCursorError::SerdeJson(e) => e.fmt(f), DocumentsBatchCursorError::SerdeJson(e) => e.fmt(f),

View File

@ -56,7 +56,7 @@ impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> {
Ok(Ok(())) Ok(Ok(()))
} }
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "a documents, or a sequence of documents.") write!(f, "a documents, or a sequence of documents.")
} }
} }

View File

@ -5,7 +5,6 @@ use std::{io, str};
use heed::{Error as HeedError, MdbError}; use heed::{Error as HeedError, MdbError};
use rayon::ThreadPoolBuildError; use rayon::ThreadPoolBuildError;
use rhai::EvalAltResult;
use serde_json::Value; use serde_json::Value;
use thiserror::Error; use thiserror::Error;
@ -260,25 +259,15 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
InvalidSettingsDimensions { embedder_name: String }, InvalidSettingsDimensions { embedder_name: String },
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")] #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String }, InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String },
#[error("Document editions cannot modify a document's primary key")]
DocumentEditionCannotModifyPrimaryKey,
#[error("Document editions must keep documents as objects")]
DocumentEditionDocumentMustBeObject,
#[error("Document edition runtime error encountered while running the function: {0}")]
DocumentEditionRuntimeError(Box<EvalAltResult>),
#[error("Document edition runtime error encountered while compiling the function: {0}")]
DocumentEditionCompilationError(rhai::ParseError),
#[error("{0}")]
DocumentEmbeddingError(String),
} }
impl From<crate::vector::Error> for Error { impl From<crate::vector::Error> for Error {
fn from(value: crate::vector::Error) -> Self { fn from(value: crate::vector::Error) -> Self {
match value.fault() { match value.fault() {
FaultSource::User => Error::UserError(value.into()), FaultSource::User => Error::UserError(value.into()),
FaultSource::Runtime => Error::UserError(value.into()), FaultSource::Runtime => Error::InternalError(value.into()),
FaultSource::Bug => Error::InternalError(value.into()), FaultSource::Bug => Error::InternalError(value.into()),
FaultSource::Undecided => Error::UserError(value.into()), FaultSource::Undecided => Error::InternalError(value.into()),
} }
} }
} }

View File

@ -24,21 +24,17 @@ impl ExternalDocumentsIds {
} }
/// Returns `true` if hard and soft external documents lists are empty. /// Returns `true` if hard and soft external documents lists are empty.
pub fn is_empty(&self, rtxn: &RoTxn<'_>) -> heed::Result<bool> { pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
self.0.is_empty(rtxn).map_err(Into::into) self.0.is_empty(rtxn).map_err(Into::into)
} }
pub fn get<A: AsRef<str>>( pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
&self,
rtxn: &RoTxn<'_>,
external_id: A,
) -> heed::Result<Option<u32>> {
self.0.get(rtxn, external_id.as_ref()) self.0.get(rtxn, external_id.as_ref())
} }
/// An helper function to debug this type, returns an `HashMap` of both, /// An helper function to debug this type, returns an `HashMap` of both,
/// soft and hard fst maps, combined. /// soft and hard fst maps, combined.
pub fn to_hash_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashMap<String, u32>> { pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
let mut map = HashMap::default(); let mut map = HashMap::default();
for result in self.0.iter(rtxn)? { for result in self.0.iter(rtxn)? {
let (external, internal) = result?; let (external, internal) = result?;
@ -55,11 +51,7 @@ impl ExternalDocumentsIds {
/// ///
/// - If attempting to delete a document that doesn't exist /// - If attempting to delete a document that doesn't exist
/// - If attempting to create a document that already exists /// - If attempting to create a document that already exists
pub fn apply( pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
&self,
wtxn: &mut RwTxn<'_>,
operations: Vec<DocumentOperation>,
) -> heed::Result<()> {
for DocumentOperation { external_id, internal_id, kind } in operations { for DocumentOperation { external_id, internal_id, kind } in operations {
match kind { match kind {
DocumentOperationKind::Create => { DocumentOperationKind::Create => {
@ -77,7 +69,7 @@ impl ExternalDocumentsIds {
} }
/// Returns an iterator over all the external ids. /// Returns an iterator over all the external ids.
pub fn iter<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<RoIter<'t, Str, BEU32>> { pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, BEU32>> {
self.0.iter(rtxn) self.0.iter(rtxn)
} }
} }

View File

@ -11,7 +11,7 @@ pub enum FacetType {
} }
impl fmt::Display for FacetType { impl fmt::Display for FacetType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self { match self {
FacetType::String => f.write_str("string"), FacetType::String => f.write_str("string"),
FacetType::Number => f.write_str("number"), FacetType::Number => f.write_str("number"),
@ -37,7 +37,7 @@ impl FromStr for FacetType {
pub struct InvalidFacetType; pub struct InvalidFacetType;
impl fmt::Display for InvalidFacetType { impl fmt::Display for InvalidFacetType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(r#"Invalid facet type, must be "string" or "number""#) f.write_str(r#"Invalid facet type, must be "string" or "number""#)
} }
} }

View File

@ -20,7 +20,7 @@ impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec { impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
type EItem = (u16, &'a str); type EItem = (u16, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> { fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 2); let mut bytes = Vec::with_capacity(s.len() + 2);
bytes.extend_from_slice(&n.to_be_bytes()); bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes()); bytes.extend_from_slice(s.as_bytes());

View File

@ -20,7 +20,7 @@ impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
impl<'a> heed::BytesEncode<'a> for BEU32StrCodec { impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
type EItem = (u32, &'a str); type EItem = (u32, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> { fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 4); let mut bytes = Vec::with_capacity(s.len() + 4);
bytes.extend_from_slice(&n.to_be_bytes()); bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes()); bytes.extend_from_slice(s.as_bytes());

View File

@ -35,7 +35,7 @@ where
fn bytes_encode( fn bytes_encode(
(field_id, document_id, value): &'a Self::EItem, (field_id, document_id, value): &'a Self::EItem,
) -> Result<Cow<'a, [u8]>, BoxedError> { ) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(32); let mut bytes = Vec::with_capacity(32);
bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes

View File

@ -24,7 +24,7 @@ impl<'a> BytesDecode<'a> for OrderedF64Codec {
impl heed::BytesEncode<'_> for OrderedF64Codec { impl heed::BytesEncode<'_> for OrderedF64Codec {
type EItem = f64; type EItem = f64;
fn bytes_encode(f: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> { fn bytes_encode(f: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut buffer = [0u8; 16]; let mut buffer = [0u8; 16];
// write the globally ordered float // write the globally ordered float

View File

@ -21,7 +21,7 @@ impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec {
impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec { impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec {
type EItem = (FieldId, u8); type EItem = (FieldId, u8);
fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> { fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(2 + 1); let mut bytes = Vec::with_capacity(2 + 1);
bytes.extend_from_slice(&field_id.to_be_bytes()); bytes.extend_from_slice(&field_id.to_be_bytes());
bytes.push(*word_count); bytes.push(*word_count);

View File

@ -16,7 +16,7 @@ impl<'a> heed::BytesDecode<'a> for ObkvCodec {
impl heed::BytesEncode<'_> for ObkvCodec { impl heed::BytesEncode<'_> for ObkvCodec {
type EItem = KvWriterU16<Vec<u8>>; type EItem = KvWriterU16<Vec<u8>>;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> { fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
item.clone().into_inner().map(Cow::Owned).map_err(Into::into) item.clone().into_inner().map(Cow::Owned).map_err(Into::into)
} }
} }

Some files were not shown because too many files have changed in this diff Show More