Compare commits

..

3 Commits

Author SHA1 Message Date
Tamo
4696b8199f 2 additional precent won by reserving the memory before pushing in the vec 2023-10-24 16:03:10 +02:00
Tamo
01b1effec0 get rid of the slow read_u32 2023-10-24 15:48:56 +02:00
Tamo
51fb4d6976 merge bitmaps with serialized bitmaps 2023-10-24 14:55:20 +02:00
369 changed files with 10740 additions and 18368 deletions

View File

@@ -1,2 +0,0 @@
[alias]
xtask = "run --package xtask --"

View File

@@ -7,17 +7,19 @@ assignees: ''
--- ---
Related product team resources: [PRD]() (_internal only_) Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
Related product discussion: Related product discussion:
Related spec: WIP Related spec: WIP
## Motivation ## Motivation
<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.--> <!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
## Usage ## Usage
<!---Link to the public part of the PRD, or to the related product discussion for experimental features--> <!---Write a quick description of the usage if the usage has already been defined-->
Refer to the final spec to know the details and the final decisions about the usage.
## TODO ## TODO
@@ -27,23 +29,6 @@ Related spec: WIP
- [ ] If prototype validated, merge changes into `main` - [ ] If prototype validated, merge changes into `main`
- [ ] Update the spec - [ ] Update the spec
### Reminders when modifying the Setting API
<!--- Special steps to remind when adding a new index setting -->
- [ ] Ensure the new setting route is at least tested by the [`test_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/tests/settings/get_settings.rs#L276)
- [ ] Ensure Analytics are fully implemented
- [ ] `/settings/my-new-setting` configurated in the [`make_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L141-L165)
- [ ] global `/settings` route configurated in the [`update_all` function](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L655-L751)
- [ ] Ensure the dump serializing is consistent with the `/settings` route serializing, e.g., enums case can be different (`camelCase` in route and `PascalCase` in the dump)
#### Special cases when adding a setting for an experimental feature
- [ ] ⚠️ API stability: The setting does not appear on the main settings route when the feature has never been enabled (e.g. mark it `Unset` when returned from the index in this situation. See [an example](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch-types/src/settings.rs#L608))
- [ ] The setting cannot be set when the feature is disabled, either by the main settings route or the subroute (see [`validate_settings` function](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch/src/routes/indexes/settings.rs#L811))
- [ ] If possible, the setting is reset when the feature is disabled (hard if it requires reindexing)
## Impacted teams ## Impacted teams
<!---Ping the related teams. Ask for the engine manager if any hesitation--> <!---Ping the related teams. Ask for the engine manager if any hesitation-->
<!---@meilisearch/docs-team when there is any API change, e.g. settings addition-->

View File

@@ -50,7 +50,7 @@ jobs:
needs: check-version needs: check-version
steps: steps:
- name: Create PR to Homebrew - name: Create PR to Homebrew
uses: mislav/bump-homebrew-formula-action@v3 uses: mislav/bump-homebrew-formula-action@v2
with: with:
formula-name: meilisearch formula-name: meilisearch
formula-path: Formula/m/meilisearch.rb formula-path: Formula/m/meilisearch.rb

View File

@@ -63,7 +63,7 @@ jobs:
uses: docker/setup-buildx-action@v3 uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub - name: Login to Docker Hub
uses: docker/login-action@v3 uses: docker/login-action@v2
with: with:
username: ${{ secrets.DOCKERHUB_USERNAME }} username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }} password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -97,7 +97,7 @@ jobs:
- name: Send CI information to Cloud team - name: Send CI information to Cloud team
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event) # Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
if: github.event_name == 'push' if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@v3 uses: peter-evans/repository-dispatch@v2
with: with:
token: ${{ secrets.MEILI_BOT_GH_PAT }} token: ${{ secrets.MEILI_BOT_GH_PAT }}
repository: meilisearch/meilisearch-cloud repository: meilisearch/meilisearch-cloud

View File

@@ -22,7 +22,7 @@ jobs:
outputs: outputs:
docker-image: ${{ steps.define-image.outputs.docker-image }} docker-image: ${{ steps.define-image.outputs.docker-image }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- name: Define the Docker image we need to use - name: Define the Docker image we need to use
id: define-image id: define-image
run: | run: |
@@ -46,11 +46,11 @@ jobs:
MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }} MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-dotnet repository: meilisearch/meilisearch-dotnet
- name: Setup .NET Core - name: Setup .NET Core
uses: actions/setup-dotnet@v4 uses: actions/setup-dotnet@v3
with: with:
dotnet-version: "6.0.x" dotnet-version: "6.0.x"
- name: Install dependencies - name: Install dependencies
@@ -75,12 +75,12 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-dart repository: meilisearch/meilisearch-dart
- uses: dart-lang/setup-dart@v1 - uses: dart-lang/setup-dart@v1
with: with:
sdk: 'latest' sdk: 3.1.1
- name: Install dependencies - name: Install dependencies
run: dart pub get run: dart pub get
- name: Run integration tests - name: Run integration tests
@@ -100,10 +100,10 @@ jobs:
- '7700:7700' - '7700:7700'
steps: steps:
- name: Set up Go - name: Set up Go
uses: actions/setup-go@v5 uses: actions/setup-go@v4
with: with:
go-version: stable go-version: stable
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-go repository: meilisearch/meilisearch-go
- name: Get dependencies - name: Get dependencies
@@ -129,11 +129,11 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-java repository: meilisearch/meilisearch-java
- name: Set up Java - name: Set up Java
uses: actions/setup-java@v4 uses: actions/setup-java@v3
with: with:
java-version: 8 java-version: 8
distribution: 'zulu' distribution: 'zulu'
@@ -156,11 +156,11 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-js repository: meilisearch/meilisearch-js
- name: Setup node - name: Setup node
uses: actions/setup-node@v4 uses: actions/setup-node@v3
with: with:
cache: 'yarn' cache: 'yarn'
- name: Install dependencies - name: Install dependencies
@@ -191,7 +191,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-php repository: meilisearch/meilisearch-php
- name: Install PHP - name: Install PHP
@@ -220,11 +220,11 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-python repository: meilisearch/meilisearch-python
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v5 uses: actions/setup-python@v4
- name: Install pipenv - name: Install pipenv
uses: dschep/install-pipenv-action@v1 uses: dschep/install-pipenv-action@v1
- name: Install dependencies - name: Install dependencies
@@ -245,7 +245,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-ruby repository: meilisearch/meilisearch-ruby
- name: Set up Ruby 3 - name: Set up Ruby 3
@@ -270,7 +270,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-rust repository: meilisearch/meilisearch-rust
- name: Build - name: Build
@@ -291,7 +291,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-swift repository: meilisearch/meilisearch-swift
- name: Run tests - name: Run tests
@@ -314,11 +314,11 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-js-plugins repository: meilisearch/meilisearch-js-plugins
- name: Setup node - name: Setup node
uses: actions/setup-node@v4 uses: actions/setup-node@v3
with: with:
cache: yarn cache: yarn
- name: Install dependencies - name: Install dependencies
@@ -345,7 +345,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-rails repository: meilisearch/meilisearch-rails
- name: Set up Ruby 3 - name: Set up Ruby 3
@@ -369,7 +369,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-symfony repository: meilisearch/meilisearch-symfony
- name: Install PHP - name: Install PHP

View File

@@ -43,7 +43,7 @@ jobs:
toolchain: nightly toolchain: nightly
override: true override: true
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1 uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo check without any default features - name: Run cargo check without any default features
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
@@ -65,11 +65,7 @@ jobs:
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1 uses: Swatinem/rust-cache@v2.6.2
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo check without any default features - name: Run cargo check without any default features
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
@@ -82,7 +78,7 @@ jobs:
args: --locked --release --all args: --locked --release --all
test-all-features: test-all-features:
name: Tests almost all features name: Tests all features
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
@@ -98,12 +94,16 @@ jobs:
with: with:
toolchain: stable toolchain: stable
override: true override: true
- name: Run cargo build with almost all features - name: Run cargo build with all features
run: | uses: actions-rs/cargo@v1
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" with:
- name: Run cargo test with almost all features command: build
run: | args: --workspace --locked --release --all-features
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" - name: Run cargo test with all features
uses: actions-rs/cargo@v1
with:
command: test
args: --workspace --locked --release --all-features
test-disabled-tokenization: test-disabled-tokenization:
name: Test disabled tokenization name: Test disabled tokenization
@@ -149,7 +149,7 @@ jobs:
toolchain: stable toolchain: stable
override: true override: true
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1 uses: Swatinem/rust-cache@v2.6.2
- name: Run tests in debug - name: Run tests in debug
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
@@ -164,11 +164,11 @@ jobs:
- uses: actions-rs/toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: 1.75.0 toolchain: 1.71.1
override: true override: true
components: clippy components: clippy
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1 uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo clippy - name: Run cargo clippy
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
@@ -187,7 +187,7 @@ jobs:
override: true override: true
components: rustfmt components: rustfmt
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1 uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo fmt - name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file. # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

View File

@@ -8,11 +8,11 @@ env:
jobs: jobs:
run-benchmarks-on-comment: run-benchmarks-on-comment:
if: startsWith(github.event.comment.body, '/benchmark')
name: Run and upload benchmarks name: Run and upload benchmarks
runs-on: benchmarks runs-on: benchmarks
timeout-minutes: 4320 # 72h timeout-minutes: 4320 # 72h
steps: steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
@@ -25,27 +25,15 @@ jobs:
with: with:
command: benchmark command: benchmark
reaction-type: "eyes" reaction-type: "eyes"
repo-token: ${{ env.GH_TOKEN }}
- uses: xt0rted/pull-request-comment-branch@v2
id: comment-branch
with:
repo_token: ${{ env.GH_TOKEN }}
- uses: actions/checkout@v3
if: success()
with:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
# Set variables # Set variables
- name: Set current branch name - name: Set current branch name
shell: bash shell: bash
run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
id: current_branch id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3 - name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash shell: bash
run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch id: normalized_current_branch
- name: Set shorter commit SHA - name: Set shorter commit SHA
shell: bash shell: bash
@@ -84,15 +72,10 @@ jobs:
# Compute the diff of the benchmarks and send a message on the GitHub PR # Compute the diff of the benchmarks and send a message on the GitHub PR
- name: Compute and send a message in the PR - name: Compute and send a message in the PR
env:
GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
run: | run: |
set -x export base=git rev-parse $(git cherry main | head -n 1 | cut -c 3-)~ | cut -c -8
export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8) echo 'Here are your benchmarks diff 👊' >> body.txt
export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
echo '```' >> body.txt echo '```' >> body.txt
./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt ./benchmaks/scipts/compare.sh $base ${{ steps.file.outputs.basename }}.json >> body.txt
echo '```' >> body.txt echo '```' >> body.txt
gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt gh pr comment ${GITHUB_REF#refs/heads/} --body-file body.txt

View File

@@ -75,12 +75,6 @@ If you get a "Too many open files" error you might want to increase the open fil
ulimit -Sn 3000 ulimit -Sn 3000
``` ```
#### Build tools
Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) workflow to provide some build tools.
Run `cargo xtask --help` from the root of the repository to find out what is available.
## Git Guidelines ## Git Guidelines
### Git Branches ### Git Branches

2253
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,6 @@
resolver = "2" resolver = "2"
members = [ members = [
"meilisearch", "meilisearch",
"meilitool",
"meilisearch-types", "meilisearch-types",
"meilisearch-auth", "meilisearch-auth",
"meili-snap", "meili-snap",
@@ -16,16 +15,11 @@ members = [
"json-depth-checker", "json-depth-checker",
"benchmarks", "benchmarks",
"fuzzers", "fuzzers",
"tracing-trace",
"xtask",
] ]
[workspace.package] [workspace.package]
version = "1.7.0" version = "1.4.0"
authors = [ authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
]
description = "Meilisearch HTTP server" description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com" homepage = "https://meilisearch.com"
readme = "README.md" readme = "README.md"

View File

@@ -1,9 +1,9 @@
# Compile # Compile
FROM rust:1.75.0-alpine3.18 AS compiler FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev RUN apk add -q --update-cache --no-cache build-base openssl-dev
WORKDIR / WORKDIR /meilisearch
ARG COMMIT_SHA ARG COMMIT_SHA
ARG COMMIT_DATE ARG COMMIT_DATE
@@ -17,7 +17,7 @@ RUN set -eux; \
if [ "$apkArch" = "aarch64" ]; then \ if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \ export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \ fi && \
cargo build --release -p meilisearch -p meilitool cargo build --release
# Run # Run
FROM alpine:3.16 FROM alpine:3.16
@@ -28,10 +28,9 @@ ENV MEILI_SERVER_PROVIDER docker
RUN apk update --quiet \ RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl && apk add -q --no-cache libgcc tini curl
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere # add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# and it's easy to find. # to find.
COPY --from=compiler /target/release/meilisearch /bin/meilisearch COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
COPY --from=compiler /target/release/meilitool /bin/meilitool
# To stay compatible with the older version of the container (pre v0.27.0) we're # To stay compatible with the older version of the container (pre v0.27.0) we're
# going to symlink the meilisearch binary in the path to `/meilisearch` # going to symlink the meilisearch binary in the path to `/meilisearch`
RUN ln -s /bin/meilisearch /meilisearch RUN ln -s /bin/meilisearch /meilisearch

View File

@@ -1,6 +1,6 @@
MIT License MIT License
Copyright (c) 2019-2024 Meili SAS Copyright (c) 2019-2022 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View File

@@ -1,14 +1,14 @@
# Profiling Meilisearch # Profiling Meilisearch
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui). Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
![An example profiling with Puffin viewer](assets/profiling-example.png) ![An example profiling with Puffin viewer](assets/profiling-example.png)
## Profiling the Indexing Process ## Profiling the Indexing Process
When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method. When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing. Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help. Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.

View File

@@ -41,10 +41,10 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
## ✨ Features ## ✨ Features
- **Search-as-you-type:** find search results in less than 50 milliseconds - **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results - **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data - **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet - **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling - **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
@@ -61,6 +61,8 @@ You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide. For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
## ⚡ Supercharge your Meilisearch experience ## ⚡ Supercharge your Meilisearch experience
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required. Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
@@ -99,7 +101,7 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions) - For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)! - Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
- Want to be part of our Discord community? [Join us!](https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact) - Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
Thank you for your support! Thank you for your support!

View File

@@ -106,7 +106,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "meilisearch_index_count{job=\"$job\", instance=\"$instance\"}", "expr": "meilisearch_index_count{job=\"meilisearch\", instance=\"$instance\"}",
"interval": "", "interval": "",
"legendFormat": "", "legendFormat": "",
"range": true, "range": true,
@@ -165,7 +165,7 @@
"type": "prometheus" "type": "prometheus"
}, },
"editorMode": "builder", "editorMode": "builder",
"expr": "meilisearch_index_docs_count{job=\"$job\", index=\"$Index\", instance=\"$instance\"}", "expr": "meilisearch_index_docs_count{job=\"meilisearch\", index=\"$Index\", instance=\"$instance\"}",
"hide": false, "hide": false,
"range": true, "range": true,
"refId": "A" "refId": "A"
@@ -228,7 +228,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[1h]))", "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[1h]))",
"interval": "", "interval": "",
"legendFormat": "", "legendFormat": "",
"range": true, "range": true,
@@ -288,7 +288,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[24h]))", "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[24h]))",
"interval": "", "interval": "",
"legendFormat": "", "legendFormat": "",
"range": true, "range": true,
@@ -348,7 +348,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[30d]))", "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[30d]))",
"interval": "", "interval": "",
"legendFormat": "", "legendFormat": "",
"range": true, "range": true,
@@ -447,7 +447,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "meilisearch_db_size_bytes{job=\"$job\", instance=\"$instance\"}", "expr": "meilisearch_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}",
"interval": "", "interval": "",
"legendFormat": "Database size on disk", "legendFormat": "Database size on disk",
"range": true, "range": true,
@@ -458,7 +458,7 @@
"type": "prometheus" "type": "prometheus"
}, },
"editorMode": "builder", "editorMode": "builder",
"expr": "meilisearch_used_db_size_bytes{job=\"$job\", instance=\"$instance\"}", "expr": "meilisearch_used_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}",
"hide": false, "hide": false,
"legendFormat": "Used bytes", "legendFormat": "Used bytes",
"range": true, "range": true,
@@ -553,7 +553,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "rate(meilisearch_http_response_time_seconds_sum{instance=\"$instance\", job=\"$job\"}[5m]) / rate(meilisearch_http_response_time_seconds_count[5m])", "expr": "rate(meilisearch_http_response_time_seconds_sum{instance=\"$instance\", job=\"meilisearch\"}[5m]) / rate(meilisearch_http_response_time_seconds_count[5m])",
"interval": "", "interval": "",
"legendFormat": "{{method}} {{path}}", "legendFormat": "{{method}} {{path}}",
"range": true, "range": true,
@@ -646,7 +646,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"$job\"}[5m])", "expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])",
"interval": "", "interval": "",
"legendFormat": "{{method}} {{path}}", "legendFormat": "{{method}} {{path}}",
"range": true, "range": true,
@@ -744,7 +744,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "sum by(le) (increase(meilisearch_http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"$job\"}[30s]))", "expr": "sum by(le) (increase(meilisearch_http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"meilisearch\"}[30s]))",
"format": "heatmap", "format": "heatmap",
"interval": "", "interval": "",
"legendFormat": "{{le}}", "legendFormat": "{{le}}",
@@ -854,7 +854,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"statuses\"}", "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"statuses\"}",
"interval": "", "interval": "",
"legendFormat": "{{value}} ", "legendFormat": "{{value}} ",
"range": true, "range": true,
@@ -947,7 +947,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"types\"}", "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"types\"}",
"interval": "", "interval": "",
"legendFormat": "{{value}} ", "legendFormat": "{{value}} ",
"range": true, "range": true,
@@ -1040,7 +1040,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"indexes\"}", "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"indexes\"}",
"interval": "", "interval": "",
"legendFormat": "{{value}} ", "legendFormat": "{{value}} ",
"range": true, "range": true,
@@ -1161,7 +1161,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "rate(process_cpu_seconds_total{job=\"$job\", instance=\"$instance\"}[1m])", "expr": "rate(process_cpu_seconds_total{job=\"meilisearch\", instance=\"$instance\"}[1m])",
"interval": "", "interval": "",
"legendFormat": "process", "legendFormat": "process",
"range": true, "range": true,
@@ -1264,7 +1264,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "process_resident_memory_bytes{job=\"$job\", instance=\"$instance\"} / 1024 / 1024", "expr": "process_resident_memory_bytes{job=\"meilisearch\", instance=\"$instance\"} / 1024 / 1024",
"interval": "", "interval": "",
"legendFormat": "process", "legendFormat": "process",
"range": true, "range": true,
@@ -1342,33 +1342,6 @@
"skipUrlSync": false, "skipUrlSync": false,
"sort": 0, "sort": 0,
"type": "query" "type": "query"
},
{
"current": {
"selected": true,
"text": "meilisearch",
"value": "meilisearch"
},
"datasource": {
"type": "prometheus"
},
"definition": "label_values(job)",
"description": "Prometheus job_name from scrape config (default is meilisearch)",
"hide": 0,
"includeAll": false,
"label": "Job",
"multi": false,
"name": "job",
"options": [],
"query": {
"query": "label_values(job)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
} }
] ]
}, },

View File

@@ -11,24 +11,24 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.79" anyhow = "1.0.70"
csv = "1.3.0" csv = "1.2.1"
milli = { path = "../milli" } milli = { path = "../milli" }
mimalloc = { version = "0.1.39", default-features = false } mimalloc = { version = "0.1.37", default-features = false }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
[dev-dependencies] [dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] } criterion = { version = "0.5.1", features = ["html_reports"] }
rand = "0.8.5" rand = "0.8.5"
rand_chacha = "0.3.1" rand_chacha = "0.3.1"
roaring = "0.10.2" roaring = { path = "../../roaring-rs" }
[build-dependencies] [build-dependencies]
anyhow = "1.0.79" anyhow = "1.0.70"
bytes = "1.5.0" bytes = "1.4.0"
convert_case = "0.6.0" convert_case = "0.6.0"
flate2 = "1.0.28" flate2 = "1.0.25"
reqwest = { version = "0.11.23", features = ["blocking", "rustls-tls"], default-features = false } reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
[features] [features]
default = ["milli/all-tokenizations"] default = ["milli/all-tokenizations"]

View File

@@ -6,7 +6,9 @@ use std::path::Path;
use criterion::{criterion_group, criterion_main, Criterion}; use criterion::{criterion_group, criterion_main, Criterion};
use milli::heed::{EnvOpenOptions, RwTxn}; use milli::heed::{EnvOpenOptions, RwTxn};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; use milli::update::{
DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
};
use milli::Index; use milli::Index;
use rand::seq::SliceRandom; use rand::seq::SliceRandom;
use rand_chacha::rand_core::SeedableRng; use rand_chacha::rand_core::SeedableRng;
@@ -36,7 +38,7 @@ fn setup_index() -> Index {
} }
fn setup_settings<'t>( fn setup_settings<'t>(
wtxn: &mut RwTxn<'t>, wtxn: &mut RwTxn<'t, '_>,
index: &'t Index, index: &'t Index,
primary_key: &str, primary_key: &str,
searchable_fields: &[&str], searchable_fields: &[&str],
@@ -264,7 +266,17 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete) (index, document_ids_to_delete)
}, },
move |(index, document_ids_to_delete)| { move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete) let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
}, },
) )
}); });
@@ -601,7 +613,17 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete) (index, document_ids_to_delete)
}, },
move |(index, document_ids_to_delete)| { move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete) let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
}, },
) )
}); });
@@ -853,31 +875,22 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete) (index, document_ids_to_delete)
}, },
move |(index, document_ids_to_delete)| { move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete) let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
}, },
) )
}); });
} }
fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBitmap>) {
let mut wtxn = index.write_txn().unwrap();
let indexer_config = IndexerConfig::default();
for ids in document_ids_to_delete {
let config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &indexer_config, config, |_| (), || false)
.unwrap();
(builder, _) = builder.remove_documents_from_db_no_batch(&ids).unwrap();
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
}
fn indexing_movies_in_three_batches(c: &mut Criterion) { fn indexing_movies_in_three_batches(c: &mut Criterion) {
let mut group = c.benchmark_group("indexing"); let mut group = c.benchmark_group("indexing");
group.sample_size(BENCHMARK_ITERATION); group.sample_size(BENCHMARK_ITERATION);
@@ -1099,7 +1112,17 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete) (index, document_ids_to_delete)
}, },
move |(index, document_ids_to_delete)| { move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete) let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
}, },
) )
}); });
@@ -1315,7 +1338,17 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete) (index, document_ids_to_delete)
}, },
move |(index, document_ids_to_delete)| { move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete) let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
}, },
) )
}); });

View File

@@ -129,6 +129,3 @@ experimental_enable_metrics = false
# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652> # Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
experimental_reduce_indexing_memory_usage = false experimental_reduce_indexing_memory_usage = false
# Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
# experimental_max_number_of_batched_tasks = 100

View File

@@ -11,22 +11,22 @@ readme.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.79" anyhow = "1.0.70"
flate2 = "1.0.28" flate2 = "1.0.25"
http = "0.2.11" http = "0.2.9"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0" once_cell = "1.17.1"
regex = "1.10.2" regex = "1.7.3"
roaring = { version = "0.10.2", features = ["serde"] } roaring = { path = "../../roaring-rs", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
tar = "0.4.40" tar = "0.4.38"
tempfile = "3.9.0" tempfile = "3.5.0"
thiserror = "1.0.56" thiserror = "1.0.40"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40" uuid = { version = "1.3.1", features = ["serde", "v4"] }
uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
big_s = "1.0.2" big_s = "1.0.2"

View File

@@ -267,7 +267,6 @@ pub(crate) mod test {
dictionary: Setting::NotSet, dictionary: Setting::NotSet,
synonyms: Setting::NotSet, synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet, distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet, typo_tolerance: Setting::NotSet,
faceting: Setting::Set(FacetingSettings { faceting: Setting::Set(FacetingSettings {
max_values_per_facet: Setting::Set(111), max_values_per_facet: Setting::Set(111),
@@ -276,7 +275,6 @@ pub(crate) mod test {
), ),
}), }),
pagination: Setting::NotSet, pagination: Setting::NotSet,
embedders: Setting::NotSet,
_kind: std::marker::PhantomData, _kind: std::marker::PhantomData,
}; };
settings.check() settings.check()

View File

@@ -120,7 +120,7 @@ impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
criterion.as_ref().map(ToString::to_string) criterion.as_ref().map(ToString::to_string)
} }
Err(()) => { Err(()) => {
tracing::warn!( log::warn!(
"Could not import the following ranking rule: `{}`.", "Could not import the following ranking rule: `{}`.",
ranking_rule ranking_rule
); );
@@ -152,11 +152,11 @@ impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
use v2::updates::UpdateStatus as UpdateStatusV2; use v2::updates::UpdateStatus as UpdateStatusV2;
Some(match source { Some(match source {
UpdateStatusV1::Enqueued { content } => { UpdateStatusV1::Enqueued { content } => {
tracing::warn!( log::warn!(
"Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)", "Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)",
content.update_id content.update_id
); );
tracing::warn!("Task will be skipped in the queue of imported tasks."); log::warn!("Task will be skipped in the queue of imported tasks.");
return None; return None;
} }
@@ -229,7 +229,7 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
Some(match source { Some(match source {
v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments, v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments,
v1::update::UpdateType::Customs => { v1::update::UpdateType::Customs => {
tracing::warn!("Ignoring task with type 'Customs' that is no longer supported"); log::warn!("Ignoring task with type 'Customs' that is no longer supported");
return None; return None;
} }
v1::update::UpdateType::DocumentsAddition { .. } => { v1::update::UpdateType::DocumentsAddition { .. } => {
@@ -296,7 +296,7 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity), v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity),
v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute), v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute),
v1::settings::RankingRule::WordsPosition => { v1::settings::RankingRule::WordsPosition => {
tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes"); log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
None None
} }
v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness), v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness),

View File

@@ -146,8 +146,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
started_processing_at: processing.started_processing_at, started_processing_at: processing.started_processing_at,
}), }),
Err(e) => { Err(e) => {
tracing::warn!("Error with task {}: {}", processing.from.update_id, e); log::warn!("Error with task {}: {}", processing.from.update_id, e);
tracing::warn!("Task will be marked as `Failed`."); log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed { v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing { from: v3::updates::Processing {
from: v3::updates::Enqueued { from: v3::updates::Enqueued {
@@ -172,8 +172,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
enqueued_at: enqueued.enqueued_at, enqueued_at: enqueued.enqueued_at,
}), }),
Err(e) => { Err(e) => {
tracing::warn!("Error with task {}: {}", enqueued.update_id, e); log::warn!("Error with task {}: {}", enqueued.update_id, e);
tracing::warn!("Task will be marked as `Failed`."); log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed { v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing { from: v3::updates::Processing {
from: v3::updates::Enqueued { from: v3::updates::Enqueued {
@@ -353,7 +353,7 @@ impl From<String> for v3::Code {
"malformed_payload" => v3::Code::MalformedPayload, "malformed_payload" => v3::Code::MalformedPayload,
"missing_payload" => v3::Code::MissingPayload, "missing_payload" => v3::Code::MissingPayload,
other => { other => {
tracing::warn!("Unknown error code {}", other); log::warn!("Unknown error code {}", other);
v3::Code::UnretrievableErrorCode v3::Code::UnretrievableErrorCode
} }
} }

View File

@@ -76,20 +76,20 @@ impl CompatV3ToV4 {
let index_uid = match index_uid { let index_uid = match index_uid {
Some(uid) => uid, Some(uid) => uid,
None => { None => {
tracing::warn!( log::warn!(
"Error while importing the update {}.", "Error while importing the update {}.",
task.update.id() task.update.id()
); );
tracing::warn!( log::warn!(
"The index associated to the uuid `{}` could not be retrieved.", "The index associated to the uuid `{}` could not be retrieved.",
task.uuid.to_string() task.uuid.to_string()
); );
if task.update.is_finished() { if task.update.is_finished() {
// we're fucking with his history but not his data, that's ok-ish. // we're fucking with his history but not his data, that's ok-ish.
tracing::warn!("The index-uuid will be set as `unknown`."); log::warn!("The index-uuid will be set as `unknown`.");
String::from("unknown") String::from("unknown")
} else { } else {
tracing::warn!("The task will be ignored."); log::warn!("The task will be ignored.");
return None; return None;
} }
} }

View File

@@ -305,7 +305,7 @@ impl From<v4::ResponseError> for v5::ResponseError {
"invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt, "invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v5::Code::InvalidApiKeyDescription, "invalid_api_key_description" => v5::Code::InvalidApiKeyDescription,
other => { other => {
tracing::warn!("Unknown error code {}", other); log::warn!("Unknown error code {}", other);
v5::Code::UnretrievableErrorCode v5::Code::UnretrievableErrorCode
} }
}; };

View File

@@ -304,7 +304,7 @@ impl From<v5::ResponseError> for v6::ResponseError {
"immutable_field" => v6::Code::BadRequest, "immutable_field" => v6::Code::BadRequest,
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists, "api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
other => { other => {
tracing::warn!("Unknown error code {}", other); log::warn!("Unknown error code {}", other);
v6::Code::UnretrievableErrorCode v6::Code::UnretrievableErrorCode
} }
}; };
@@ -329,7 +329,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
new_ranking_rules.push(new_rule); new_ranking_rules.push(new_rule);
} }
Err(_) => { Err(_) => {
tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.") log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
} }
} }
} }
@@ -345,7 +345,6 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
dictionary: v6::Setting::NotSet, dictionary: v6::Setting::NotSet,
synonyms: settings.synonyms.into(), synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(), distinct_attribute: settings.distinct_attribute.into(),
proximity_precision: v6::Setting::NotSet,
typo_tolerance: match settings.typo_tolerance { typo_tolerance: match settings.typo_tolerance {
v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance { v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance {
enabled: typo.enabled.into(), enabled: typo.enabled.into(),
@@ -378,7 +377,6 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
v5::Setting::Reset => v6::Setting::Reset, v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet, v5::Setting::NotSet => v6::Setting::NotSet,
}, },
embedders: v6::Setting::NotSet,
_kind: std::marker::PhantomData, _kind: std::marker::PhantomData,
} }
} }

View File

@@ -13,12 +13,12 @@ use crate::{Result, Version};
mod compat; mod compat;
mod v1; pub(self) mod v1;
mod v2; pub(self) mod v2;
mod v3; pub(self) mod v3;
mod v4; pub(self) mod v4;
mod v5; pub(self) mod v5;
mod v6; pub(self) mod v6;
pub type Document = serde_json::Map<String, serde_json::Value>; pub type Document = serde_json::Map<String, serde_json::Value>;
pub type UpdateFile = dyn Iterator<Item = Result<Document>>; pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
@@ -526,12 +526,12 @@ pub(crate) mod test {
assert!(indexes.is_empty()); assert!(indexes.is_empty());
// products // products
insta::assert_json_snapshot!(products.metadata(), @r###" insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "products", "uid": "products",
"primaryKey": "sku", "primaryKey": "sku",
"createdAt": "2022-10-09T20:27:22.688964637Z", "createdAt": "[now]",
"updatedAt": "2022-10-09T20:27:23.951017769Z" "updatedAt": "[now]"
} }
"###); "###);
@@ -541,12 +541,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies // movies
insta::assert_json_snapshot!(movies.metadata(), @r###" insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "movies", "uid": "movies",
"primaryKey": "id", "primaryKey": "id",
"createdAt": "2022-10-09T20:27:22.197788495Z", "createdAt": "[now]",
"updatedAt": "2022-10-09T20:28:01.93111053Z" "updatedAt": "[now]"
} }
"###); "###);
@@ -571,12 +571,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells // spells
insta::assert_json_snapshot!(spells.metadata(), @r###" insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "dnd_spells", "uid": "dnd_spells",
"primaryKey": "index", "primaryKey": "index",
"createdAt": "2022-10-09T20:27:24.242683494Z", "createdAt": "[now]",
"updatedAt": "2022-10-09T20:27:24.312809641Z" "updatedAt": "[now]"
} }
"###); "###);
@@ -617,12 +617,12 @@ pub(crate) mod test {
assert!(indexes.is_empty()); assert!(indexes.is_empty());
// products // products
insta::assert_json_snapshot!(products.metadata(), @r###" insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "products", "uid": "products",
"primaryKey": "sku", "primaryKey": "sku",
"createdAt": "2023-01-30T16:25:56.595257Z", "createdAt": "[now]",
"updatedAt": "2023-01-30T16:25:58.70348Z" "updatedAt": "[now]"
} }
"###); "###);
@@ -632,12 +632,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies // movies
insta::assert_json_snapshot!(movies.metadata(), @r###" insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "movies", "uid": "movies",
"primaryKey": "id", "primaryKey": "id",
"createdAt": "2023-01-30T16:25:56.192178Z", "createdAt": "[now]",
"updatedAt": "2023-01-30T16:25:56.455714Z" "updatedAt": "[now]"
} }
"###); "###);
@@ -647,12 +647,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720"); meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
// spells // spells
insta::assert_json_snapshot!(spells.metadata(), @r###" insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "dnd_spells", "uid": "dnd_spells",
"primaryKey": "index", "primaryKey": "index",
"createdAt": "2023-01-30T16:25:58.876405Z", "createdAt": "[now]",
"updatedAt": "2023-01-30T16:25:59.079906Z" "updatedAt": "[now]"
} }
"###); "###);

View File

@@ -0,0 +1,24 @@
---
source: dump/src/reader/mod.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,38 @@
---
source: dump/src/reader/mod.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@@ -0,0 +1,31 @@
---
source: dump/src/reader/mod.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"genres",
"id"
],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -56,7 +56,8 @@ pub enum RankingRule {
Desc(String), Desc(String),
} }
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(asc|desc)\(([\w_-]+)\)").unwrap()); static ASC_DESC_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
impl FromStr for RankingRule { impl FromStr for RankingRule {
type Err = (); type Err = ();

View File

@@ -46,7 +46,6 @@ pub type Checked = settings::Checked;
pub type Unchecked = settings::Unchecked; pub type Unchecked = settings::Unchecked;
pub type Task = updates::UpdateEntry; pub type Task = updates::UpdateEntry;
pub type Kind = updates::UpdateMeta;
// everything related to the errors // everything related to the errors
pub type ResponseError = errors::ResponseError; pub type ResponseError = errors::ResponseError;
@@ -108,11 +107,8 @@ impl V2Reader {
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> { pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
Ok(self.index_uuid.iter().map(|index| -> Result<_> { Ok(self.index_uuid.iter().map(|index| -> Result<_> {
V2IndexReader::new( V2IndexReader::new(
index.uid.clone(),
&self.dump.path().join("indexes").join(format!("index-{}", index.uuid)), &self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
index,
BufReader::new(
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
),
) )
})) }))
} }
@@ -147,41 +143,16 @@ pub struct V2IndexReader {
} }
impl V2IndexReader { impl V2IndexReader {
pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> { pub fn new(name: String, path: &Path) -> Result<Self> {
let meta = File::open(path.join("meta.json"))?; let meta = File::open(path.join("meta.json"))?;
let meta: DumpMeta = serde_json::from_reader(meta)?; let meta: DumpMeta = serde_json::from_reader(meta)?;
let mut created_at = None;
let mut updated_at = None;
for line in tasks.lines() {
let task: Task = serde_json::from_str(&line?)?;
if !(task.uuid == index_uuid.uuid && task.is_finished()) {
continue;
}
let new_created_at = match task.update.meta() {
Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
_ => None,
};
let new_updated_at = task.update.finished_at();
if created_at.is_none() || created_at > new_created_at {
created_at = new_created_at;
}
if updated_at.is_none() || updated_at < new_updated_at {
updated_at = new_updated_at;
}
}
let current_time = OffsetDateTime::now_utc();
let metadata = IndexMetadata { let metadata = IndexMetadata {
uid: index_uuid.uid.clone(), uid: name,
primary_key: meta.primary_key, primary_key: meta.primary_key,
created_at: created_at.unwrap_or(current_time), // FIXME: Iterate over the whole task queue to find the creation and last update date.
updated_at: updated_at.unwrap_or(current_time), created_at: OffsetDateTime::now_utc(),
updated_at: OffsetDateTime::now_utc(),
}; };
let ret = V2IndexReader { let ret = V2IndexReader {
@@ -277,12 +248,12 @@ pub(crate) mod test {
assert!(indexes.is_empty()); assert!(indexes.is_empty());
// products // products
insta::assert_json_snapshot!(products.metadata(), @r###" insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "products", "uid": "products",
"primaryKey": "sku", "primaryKey": "sku",
"createdAt": "2022-10-09T20:27:22.688964637Z", "createdAt": "[now]",
"updatedAt": "2022-10-09T20:27:23.951017769Z" "updatedAt": "[now]"
} }
"###); "###);
@@ -292,12 +263,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies // movies
insta::assert_json_snapshot!(movies.metadata(), @r###" insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "movies", "uid": "movies",
"primaryKey": "id", "primaryKey": "id",
"createdAt": "2022-10-09T20:27:22.197788495Z", "createdAt": "[now]",
"updatedAt": "2022-10-09T20:28:01.93111053Z" "updatedAt": "[now]"
} }
"###); "###);
@@ -322,12 +293,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells // spells
insta::assert_json_snapshot!(spells.metadata(), @r###" insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "dnd_spells", "uid": "dnd_spells",
"primaryKey": "index", "primaryKey": "index",
"createdAt": "2022-10-09T20:27:24.242683494Z", "createdAt": "[now]",
"updatedAt": "2022-10-09T20:27:24.312809641Z" "updatedAt": "[now]"
} }
"###); "###);
@@ -369,12 +340,12 @@ pub(crate) mod test {
assert!(indexes.is_empty()); assert!(indexes.is_empty());
// products // products
insta::assert_json_snapshot!(products.metadata(), @r###" insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "products", "uid": "products",
"primaryKey": "sku", "primaryKey": "sku",
"createdAt": "2023-01-30T16:25:56.595257Z", "createdAt": "[now]",
"updatedAt": "2023-01-30T16:25:58.70348Z" "updatedAt": "[now]"
} }
"###); "###);
@@ -384,12 +355,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies // movies
insta::assert_json_snapshot!(movies.metadata(), @r###" insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "movies", "uid": "movies",
"primaryKey": "id", "primaryKey": "id",
"createdAt": "2023-01-30T16:25:56.192178Z", "createdAt": "[now]",
"updatedAt": "2023-01-30T16:25:56.455714Z" "updatedAt": "[now]"
} }
"###); "###);
@@ -399,12 +370,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720"); meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
// spells // spells
insta::assert_json_snapshot!(spells.metadata(), @r###" insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{ {
"uid": "dnd_spells", "uid": "dnd_spells",
"primaryKey": "index", "primaryKey": "index",
"createdAt": "2023-01-30T16:25:58.876405Z", "createdAt": "[now]",
"updatedAt": "2023-01-30T16:25:59.079906Z" "updatedAt": "[now]"
} }
"###); "###);

View File

@@ -227,14 +227,4 @@ impl UpdateStatus {
_ => None, _ => None,
} }
} }
pub fn finished_at(&self) -> Option<OffsetDateTime> {
match self {
UpdateStatus::Processing(_) => None,
UpdateStatus::Enqueued(_) => None,
UpdateStatus::Processed(u) => Some(u.processed_at),
UpdateStatus::Aborted(_) => None,
UpdateStatus::Failed(u) => Some(u.failed_at),
}
}
} }

View File

@@ -1,6 +1,5 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[allow(clippy::enum_variant_names)]
#[derive(Serialize, Deserialize, Debug, Clone, Copy)] #[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code { pub enum Code {
// index related error // index related error

View File

@@ -95,7 +95,6 @@ impl fmt::Display for ErrorType {
} }
} }
#[allow(clippy::enum_variant_names)]
#[derive(Serialize, Deserialize, Debug, Clone, Copy)] #[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code { pub enum Code {
// index related error // index related error

View File

@@ -31,7 +31,6 @@ impl ResponseError {
} }
} }
#[allow(clippy::enum_variant_names)]
#[derive(Deserialize, Debug, Clone, Copy)] #[derive(Deserialize, Debug, Clone, Copy)]
#[cfg_attr(test, derive(serde::Serialize))] #[cfg_attr(test, derive(serde::Serialize))]
pub enum Code { pub enum Code {

View File

@@ -2,10 +2,10 @@ use std::fs::{self, File};
use std::io::{BufRead, BufReader, ErrorKind}; use std::io::{BufRead, BufReader, ErrorKind};
use std::path::Path; use std::path::Path;
use log::debug;
pub use meilisearch_types::milli; pub use meilisearch_types::milli;
use tempfile::TempDir; use tempfile::TempDir;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use uuid::Uuid; use uuid::Uuid;
use super::Document; use super::Document;

View File

@@ -11,9 +11,9 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
tempfile = "3.9.0" tempfile = "3.5.0"
thiserror = "1.0.56" thiserror = "1.0.40"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
faux = "0.1.10" faux = "0.1.9"

View File

@@ -13,8 +13,8 @@ license.workspace = true
[dependencies] [dependencies]
nom = "7.1.3" nom = "7.1.3"
nom_locate = "4.2.0" nom_locate = "4.1.0"
unescaper = "0.1.3" unescaper = "0.1.2"
[dev-dependencies] [dev-dependencies]
insta = "1.34.0" insta = "1.29.0"

View File

@@ -564,10 +564,10 @@ pub mod tests {
#[test] #[test]
fn parse_escaped() { fn parse_escaped() {
insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#); insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#); insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#); insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#); insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequencies // but it also works with other sequencies
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}"); insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
} }

View File

@@ -270,8 +270,8 @@ pub mod test {
("aaaa", "", rtok("", "aaaa"), "aaaa"), ("aaaa", "", rtok("", "aaaa"), "aaaa"),
(r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"), (r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
(r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#), (r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
(r"aa\\\aa", r#""#, rtok("", r"aa\\\aa"), r"aa\\\aa"), (r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r"aa\\"), r"aa\\"), (r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
(r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#), (r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
(r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#), (r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
]; ];
@@ -301,12 +301,12 @@ pub mod test {
); );
// simple quote // simple quote
assert_eq!( assert_eq!(
unescape(Span::new_extra(r"Hello \'World\'", ""), '\''), unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
r#"Hello 'World'"#.to_string() r#"Hello 'World'"#.to_string()
); );
assert_eq!( assert_eq!(
unescape(Span::new_extra(r"Hello \\\'World\\\'", ""), '\''), unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
r"Hello \\'World\\'".to_string() r#"Hello \\'World\\'"#.to_string()
); );
} }
@@ -335,19 +335,19 @@ pub mod test {
("\"cha'nnel\"", "cha'nnel", false), ("\"cha'nnel\"", "cha'nnel", false),
("I'm tamo", "I", false), ("I'm tamo", "I", false),
// escaped thing but not quote // escaped thing but not quote
(r#""\\""#, r"\", true), (r#""\\""#, r#"\"#, true),
(r#""\\\\\\""#, r"\\\", true), (r#""\\\\\\""#, r#"\\\"#, true),
(r#""aa\\aa""#, r"aa\aa", true), (r#""aa\\aa""#, r#"aa\aa"#, true),
// with double quote // with double quote
(r#""Hello \"world\"""#, r#"Hello "world""#, true), (r#""Hello \"world\"""#, r#"Hello "world""#, true),
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true), (r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true), (r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
(r#""\"\"""#, r#""""#, true), (r#""\"\"""#, r#""""#, true),
// with simple quote // with simple quote
(r"'Hello \'world\''", r#"Hello 'world'"#, true), (r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
(r"'Hello \\\'world\\\''", r"Hello \'world\'", true), (r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true), (r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
(r"'\'\''", r#"''"#, true), (r#"'\'\''"#, r#"''"#, true),
]; ];
for (input, expected, escaped) in test_case { for (input, expected, escaped) in test_case {

View File

@@ -11,10 +11,10 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
arbitrary = { version = "1.3.2", features = ["derive"] } arbitrary = { version = "1.3.0", features = ["derive"] }
clap = { version = "4.4.17", features = ["derive"] } clap = { version = "4.3.0", features = ["derive"] }
fastrand = "2.0.1" fastrand = "2.0.0"
milli = { path = "../milli" } milli = { path = "../milli" }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
tempfile = "3.9.0" tempfile = "3.5.0"

View File

@@ -113,7 +113,7 @@ fn main() {
index.documents(&wtxn, res.documents_ids).unwrap(); index.documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed); progression.fetch_add(1, Ordering::Relaxed);
} }
wtxn.abort(); wtxn.abort().unwrap();
}); });
if let err @ Err(_) = handle.join() { if let err @ Err(_) = handle.join() {
stop.store(true, Ordering::Relaxed); stop.store(true, Ordering::Relaxed);

View File

@@ -11,36 +11,30 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.79" anyhow = "1.0.70"
bincode = "1.3.3" bincode = "1.3.3"
csv = "1.3.0" csv = "1.2.1"
derive_builder = "0.12.0" derive_builder = "0.12.0"
dump = { path = "../dump" } dump = { path = "../dump" }
enum-iterator = "1.5.0" enum-iterator = "1.4.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.28" log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0" page_size = "0.5.0"
puffin = { version = "0.16.0", features = ["serialization"] } puffin = "0.16.0"
roaring = { version = "0.10.2", features = ["serde"] } roaring = { path = "../../roaring-rs", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
synchronoise = "1.0.1" synchronoise = "1.0.1"
tempfile = "3.9.0" tempfile = "3.5.0"
thiserror = "1.0.56" thiserror = "1.0.40"
time = { version = "0.3.31", features = [ time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
"serde-well-known", uuid = { version = "1.3.1", features = ["serde", "v4"] }
"formatting",
"parsing",
"macros",
] }
tracing = "0.1.40"
ureq = "2.9.1"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
big_s = "1.0.2" big_s = "1.0.2"
crossbeam = "0.8.4" crossbeam = "0.8.2"
insta = { version = "1.34.0", features = ["json", "redactions"] } insta = { version = "1.29.0", features = ["json", "redactions"] }
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}

View File

@@ -19,19 +19,20 @@ one indexing operation.
use std::collections::{BTreeSet, HashSet}; use std::collections::{BTreeSet, HashSet};
use std::ffi::OsStr; use std::ffi::OsStr;
use std::fmt;
use std::fs::{self, File}; use std::fs::{self, File};
use std::io::BufWriter; use std::io::BufWriter;
use dump::IndexMetadata; use dump::IndexMetadata;
use log::{debug, error, info};
use meilisearch_types::error::Code; use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{ use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings, DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Settings as MilliSettings,
}; };
use meilisearch_types::milli::{self, Filter}; use meilisearch_types::milli::{self, Filter, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@@ -42,7 +43,7 @@ use uuid::Uuid;
use crate::autobatcher::{self, BatchKind}; use crate::autobatcher::{self, BatchKind};
use crate::utils::{self, swap_index_uid_in_task}; use crate::utils::{self, swap_index_uid_in_task};
use crate::{Error, IndexScheduler, MustStopProcessing, ProcessingTasks, Result, TaskId}; use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
/// Represents a combination of tasks that can all be processed at the same time. /// Represents a combination of tasks that can all be processed at the same time.
/// ///
@@ -59,7 +60,7 @@ pub(crate) enum Batch {
/// The list of tasks that were processing when this task cancelation appeared. /// The list of tasks that were processing when this task cancelation appeared.
previous_processing_tasks: RoaringBitmap, previous_processing_tasks: RoaringBitmap,
}, },
TaskDeletions(Vec<Task>), TaskDeletion(Task),
SnapshotCreation(Vec<Task>), SnapshotCreation(Vec<Task>),
Dump(Task), Dump(Task),
IndexOperation { IndexOperation {
@@ -103,6 +104,12 @@ pub(crate) enum IndexOperation {
operations: Vec<DocumentOperation>, operations: Vec<DocumentOperation>,
tasks: Vec<Task>, tasks: Vec<Task>,
}, },
DocumentDeletion {
index_uid: String,
// The vec associated with each document deletion tasks.
documents: Vec<Vec<String>>,
tasks: Vec<Task>,
},
IndexDocumentDeletionByFilter { IndexDocumentDeletionByFilter {
index_uid: String, index_uid: String,
task: Task, task: Task,
@@ -145,14 +152,16 @@ impl Batch {
pub fn ids(&self) -> Vec<TaskId> { pub fn ids(&self) -> Vec<TaskId> {
match self { match self {
Batch::TaskCancelation { task, .. } Batch::TaskCancelation { task, .. }
| Batch::TaskDeletion(task)
| Batch::Dump(task) | Batch::Dump(task)
| Batch::IndexCreation { task, .. } | Batch::IndexCreation { task, .. }
| Batch::IndexUpdate { task, .. } => vec![task.uid], | Batch::IndexUpdate { task, .. } => vec![task.uid],
Batch::SnapshotCreation(tasks) Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
| Batch::TaskDeletions(tasks) tasks.iter().map(|task| task.uid).collect()
| Batch::IndexDeletion { tasks, .. } => tasks.iter().map(|task| task.uid).collect(), }
Batch::IndexOperation { op, .. } => match op { Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. } IndexOperation::DocumentOperation { tasks, .. }
| IndexOperation::DocumentDeletion { tasks, .. }
| IndexOperation::Settings { tasks, .. } | IndexOperation::Settings { tasks, .. }
| IndexOperation::DocumentClear { tasks, .. } => { | IndexOperation::DocumentClear { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect() tasks.iter().map(|task| task.uid).collect()
@@ -178,7 +187,7 @@ impl Batch {
use Batch::*; use Batch::*;
match self { match self {
TaskCancelation { .. } TaskCancelation { .. }
| TaskDeletions(_) | TaskDeletion(_)
| SnapshotCreation(_) | SnapshotCreation(_)
| Dump(_) | Dump(_)
| IndexSwap { .. } => None, | IndexSwap { .. } => None,
@@ -190,33 +199,11 @@ impl Batch {
} }
} }
impl fmt::Display for Batch {
/// A text used when we debug the profiling reports.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let index_uid = self.index_uid();
let tasks = self.ids();
match self {
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?,
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
Batch::Dump(_) => f.write_str("Dump")?,
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
};
match index_uid {
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
}
}
}
impl IndexOperation { impl IndexOperation {
pub fn index_uid(&self) -> &str { pub fn index_uid(&self) -> &str {
match self { match self {
IndexOperation::DocumentOperation { index_uid, .. } IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. } | IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. } | IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. } | IndexOperation::Settings { index_uid, .. }
@@ -226,27 +213,6 @@ impl IndexOperation {
} }
} }
impl fmt::Display for IndexOperation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IndexOperation::DocumentOperation { .. } => {
f.write_str("IndexOperation::DocumentOperation")
}
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
}
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
IndexOperation::DocumentClearAndSetting { .. } => {
f.write_str("IndexOperation::DocumentClearAndSetting")
}
IndexOperation::SettingsAndDocumentOperation { .. } => {
f.write_str("IndexOperation::SettingsAndDocumentOperation")
}
}
}
}
impl IndexScheduler { impl IndexScheduler {
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`]. /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
/// ///
@@ -334,27 +300,18 @@ impl IndexScheduler {
BatchKind::DocumentDeletion { deletion_ids } => { BatchKind::DocumentDeletion { deletion_ids } => {
let tasks = self.get_existing_tasks(rtxn, deletion_ids)?; let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;
let mut operations = Vec::with_capacity(tasks.len()); let mut documents = Vec::new();
let mut documents_counts = Vec::with_capacity(tasks.len());
for task in &tasks { for task in &tasks {
match task.kind { match task.kind {
KindWithContent::DocumentDeletion { ref documents_ids, .. } => { KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
operations.push(DocumentOperation::Delete(documents_ids.clone())); documents.push(documents_ids.clone())
documents_counts.push(documents_ids.len() as u64);
} }
_ => unreachable!(), _ => unreachable!(),
} }
} }
Ok(Some(Batch::IndexOperation { Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentOperation { op: IndexOperation::DocumentDeletion { index_uid, documents, tasks },
index_uid,
primary_key: None,
method: IndexDocumentsMethod::ReplaceDocuments,
documents_counts,
operations,
tasks,
},
must_create_index, must_create_index,
})) }))
} }
@@ -513,7 +470,6 @@ impl IndexScheduler {
/// 3. We get the *next* snapshot to process. /// 3. We get the *next* snapshot to process.
/// 4. We get the *next* dump to process. /// 4. We get the *next* dump to process.
/// 5. We get the *next* tasks to process for a specific index. /// 5. We get the *next* tasks to process for a specific index.
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> { pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> {
#[cfg(test)] #[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
@@ -538,9 +494,9 @@ impl IndexScheduler {
// 2. we get the next task to delete // 2. we get the next task to delete
let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued; let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
if !to_delete.is_empty() { if let Some(task_id) = to_delete.min() {
let tasks = self.get_existing_tasks(rtxn, to_delete)?; let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
return Ok(Some(Batch::TaskDeletions(tasks))); return Ok(Some(Batch::TaskDeletion(task)));
} }
// 3. we batch the snapshot. // 3. we batch the snapshot.
@@ -583,9 +539,7 @@ impl IndexScheduler {
let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued; let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued;
// If autobatching is disabled we only take one task at a time. // If autobatching is disabled we only take one task at a time.
// Otherwise, we take only a maximum of tasks to create batches. let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 };
let tasks_limit =
if self.autobatching_enabled { self.max_number_of_batched_tasks } else { 1 };
let enqueued = index_tasks let enqueued = index_tasks
.into_iter() .into_iter()
@@ -619,7 +573,6 @@ impl IndexScheduler {
/// The list of tasks that were processed. The metadata of each task in the returned /// The list of tasks that were processed. The metadata of each task in the returned
/// list is updated accordingly, with the exception of the its date fields /// list is updated accordingly, with the exception of the its date fields
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> { pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> {
#[cfg(test)] #[cfg(test)]
{ {
@@ -628,7 +581,7 @@ impl IndexScheduler {
self.breakpoint(crate::Breakpoint::InsideProcessBatch); self.breakpoint(crate::Breakpoint::InsideProcessBatch);
} }
puffin::profile_function!(batch.to_string()); puffin::profile_function!(format!("{:?}", batch));
match batch { match batch {
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => { Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
@@ -669,10 +622,9 @@ impl IndexScheduler {
Ok(()) => { Ok(()) => {
for content_uuid in canceled_tasks_content_uuids { for content_uuid in canceled_tasks_content_uuids {
if let Err(error) = self.delete_update_file(content_uuid) { if let Err(error) = self.delete_update_file(content_uuid) {
tracing::error!( error!(
file_content_uuid = %content_uuid, "We failed deleting the content file indentified as {}: {}",
%error, content_uuid, error
"Failed deleting content file"
) )
} }
} }
@@ -682,43 +634,31 @@ impl IndexScheduler {
Ok(vec![task]) Ok(vec![task])
} }
Batch::TaskDeletions(mut tasks) => { Batch::TaskDeletion(mut task) => {
// 1. Retrieve the tasks that matched the query at enqueue-time. // 1. Retrieve the tasks that matched the query at enqueue-time.
let mut matched_tasks = RoaringBitmap::new(); let matched_tasks =
for task in tasks.iter() {
if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind { if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind {
matched_tasks |= tasks; tasks
} else { } else {
unreachable!() unreachable!()
}
}
let mut wtxn = self.env.write_txn()?;
let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?;
wtxn.commit()?;
for task in tasks.iter_mut() {
task.status = Status::Succeeded;
let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else {
unreachable!()
}; };
let deleted_tasks_count = deleted_tasks.intersection_len(tasks); let mut wtxn = self.env.write_txn()?;
deleted_tasks -= tasks; let deleted_tasks_count = self.delete_matched_tasks(&mut wtxn, matched_tasks)?;
match &mut task.details { task.status = Status::Succeeded;
Some(Details::TaskDeletion { match &mut task.details {
matched_tasks: _, Some(Details::TaskDeletion {
deleted_tasks, matched_tasks: _,
original_filter: _, deleted_tasks,
}) => { original_filter: _,
*deleted_tasks = Some(deleted_tasks_count); }) => {
} *deleted_tasks = Some(deleted_tasks_count);
_ => unreachable!(),
} }
_ => unreachable!(),
} }
Ok(tasks) wtxn.commit()?;
Ok(vec![task])
} }
Batch::SnapshotCreation(mut tasks) => { Batch::SnapshotCreation(mut tasks) => {
fs::create_dir_all(&self.snapshots_path)?; fs::create_dir_all(&self.snapshots_path)?;
@@ -730,7 +670,7 @@ impl IndexScheduler {
// 2. Snapshot the index-scheduler LMDB env // 2. Snapshot the index-scheduler LMDB env
// //
// When we call copy_to_file, LMDB opens a read transaction by itself, // When we call copy_to_path, LMDB opens a read transaction by itself,
// we can't provide our own. It is an issue as we would like to know // we can't provide our own. It is an issue as we would like to know
// the update files to copy but new ones can be enqueued between the copy // the update files to copy but new ones can be enqueued between the copy
// of the env and the new transaction we open to retrieve the enqueued tasks. // of the env and the new transaction we open to retrieve the enqueued tasks.
@@ -743,7 +683,7 @@ impl IndexScheduler {
// 2.1 First copy the LMDB env of the index-scheduler // 2.1 First copy the LMDB env of the index-scheduler
let dst = temp_snapshot_dir.path().join("tasks"); let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 2.2 Create a read transaction on the index-scheduler // 2.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
@@ -768,7 +708,7 @@ impl IndexScheduler {
let index = self.index_mapper.index(&rtxn, name)?; let index = self.index_mapper.index(&rtxn, name)?;
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; index.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
} }
drop(rtxn); drop(rtxn);
@@ -781,7 +721,7 @@ impl IndexScheduler {
.map_size(1024 * 1024 * 1024) // 1 GiB .map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2) .max_dbs(2)
.open(&self.auth_path)?; .open(&self.auth_path)?;
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot // 5. Copy and tarball the flat snapshot
// 5.1 Find the original name of the database // 5.1 Find the original name of the database
@@ -837,10 +777,6 @@ impl IndexScheduler {
// 2. dump the tasks // 2. dump the tasks
let mut dump_tasks = dump.create_tasks_queue()?; let mut dump_tasks = dump.create_tasks_queue()?;
for ret in self.all_tasks.iter(&rtxn)? { for ret in self.all_tasks.iter(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_, mut t) = ret?; let (_, mut t) = ret?;
let status = t.status; let status = t.status;
let content_file = t.content_uuid(); let content_file = t.content_uuid();
@@ -861,9 +797,6 @@ impl IndexScheduler {
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
if let Some(content_file) = content_file { if let Some(content_file) = content_file {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
if status == Status::Enqueued { if status == Status::Enqueued {
let content_file = self.file_store.get_update(content_file)?; let content_file = self.file_store.get_update(content_file)?;
@@ -903,9 +836,6 @@ impl IndexScheduler {
// 3.1. Dump the documents // 3.1. Dump the documents
for ret in index.all_documents(&rtxn)? { for ret in index.all_documents(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_id, doc) = ret?; let (_id, doc) = ret?;
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?; let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?; index_dumper.push_document(&document)?;
@@ -918,16 +848,13 @@ impl IndexScheduler {
})?; })?;
// 4. Dump experimental feature settings // 4. Dump experimental feature settings
let features = self.features().runtime_features(); let features = self.features()?.runtime_features();
dump.create_experimental_features(features)?; dump.create_experimental_features(features)?;
let dump_uid = started_at.format(format_description!( let dump_uid = started_at.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
)).unwrap(); )).unwrap();
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let path = self.dumps_path.join(format!("{}.dump", dump_uid)); let path = self.dumps_path.join(format!("{}.dump", dump_uid));
let file = File::create(path)?; let file = File::create(path)?;
dump.persist_to(BufWriter::new(file))?; dump.persist_to(BufWriter::new(file))?;
@@ -948,10 +875,6 @@ impl IndexScheduler {
self.index_mapper.index(&rtxn, &index_uid)? self.index_mapper.index(&rtxn, &index_uid)?
}; };
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
self.index_mapper
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
let mut index_wtxn = index.write_txn()?; let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?; let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
index_wtxn.commit()?; index_wtxn.commit()?;
@@ -971,10 +894,7 @@ impl IndexScheduler {
match res { match res {
Ok(_) => (), Ok(_) => (),
Err(e) => tracing::error!( Err(e) => error!("Could not write the stats of the index {}", e),
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
} }
Ok(tasks) Ok(tasks)
@@ -1002,7 +922,7 @@ impl IndexScheduler {
builder.set_primary_key(primary_key); builder.set_primary_key(primary_key);
let must_stop_processing = self.must_stop_processing.clone(); let must_stop_processing = self.must_stop_processing.clone();
builder.execute( builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step), |indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(), || must_stop_processing.get(),
)?; )?;
index_wtxn.commit()?; index_wtxn.commit()?;
@@ -1029,10 +949,7 @@ impl IndexScheduler {
match res { match res {
Ok(_) => (), Ok(_) => (),
Err(e) => tracing::error!( Err(e) => error!("Could not write the stats of the index {}", e),
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
} }
Ok(vec![task]) Ok(vec![task])
@@ -1127,7 +1044,7 @@ impl IndexScheduler {
for task_id in &index_lhs_task_ids | &index_rhs_task_ids { for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
swap_index_uid_in_task(&mut task, (lhs, rhs)); swap_index_uid_in_task(&mut task, (lhs, rhs));
self.all_tasks.put(wtxn, &task_id, &task)?; self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?;
} }
// 4. remove the task from indexuid = before_name // 4. remove the task from indexuid = before_name
@@ -1151,14 +1068,9 @@ impl IndexScheduler {
/// ///
/// ## Return /// ## Return
/// The list of processed tasks. /// The list of processed tasks.
#[tracing::instrument(
level = "trace",
skip(self, index_wtxn, index),
target = "indexing::scheduler"
)]
fn apply_index_operation<'i>( fn apply_index_operation<'i>(
&self, &self,
index_wtxn: &mut RwTxn<'i>, index_wtxn: &mut RwTxn<'i, '_>,
index: &'i Index, index: &'i Index,
operation: IndexOperation, operation: IndexOperation,
) -> Result<Vec<Task>> { ) -> Result<Vec<Task>> {
@@ -1216,7 +1128,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config); milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.set_primary_key(primary_key); builder.set_primary_key(primary_key);
builder.execute( builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step), |indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.clone().get(), || must_stop_processing.clone().get(),
)?; )?;
primary_key_has_been_set = true; primary_key_has_been_set = true;
@@ -1226,16 +1138,12 @@ impl IndexScheduler {
let config = IndexDocumentsConfig { update_method: method, ..Default::default() }; let config = IndexDocumentsConfig { update_method: method, ..Default::default() };
let embedder_configs = index.embedding_configs(index_wtxn)?;
// TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
let embedders = self.embedders(embedder_configs)?;
let mut builder = milli::update::IndexDocuments::new( let mut builder = milli::update::IndexDocuments::new(
index_wtxn, index_wtxn,
index, index,
indexer_config, indexer_config,
config, config,
|indexing_step| tracing::trace!(?indexing_step, "Update"), |indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(), || must_stop_processing.get(),
)?; )?;
@@ -1248,8 +1156,6 @@ impl IndexScheduler {
let (new_builder, user_result) = builder.add_documents(reader)?; let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder; builder = new_builder;
builder = builder.with_embedders(embedders.clone());
let received_documents = let received_documents =
if let Some(Details::DocumentAdditionOrUpdate { if let Some(Details::DocumentAdditionOrUpdate {
received_documents, received_documents,
@@ -1284,8 +1190,7 @@ impl IndexScheduler {
let (new_builder, user_result) = let (new_builder, user_result) =
builder.remove_documents(document_ids)?; builder.remove_documents(document_ids)?;
builder = new_builder; builder = new_builder;
// Uses Invariant: remove documents actually always returns Ok for the inner result
let count = user_result.unwrap();
let provided_ids = let provided_ids =
if let Some(Details::DocumentDeletion { provided_ids, .. }) = if let Some(Details::DocumentDeletion { provided_ids, .. }) =
task.details task.details
@@ -1296,18 +1201,30 @@ impl IndexScheduler {
unreachable!(); unreachable!();
}; };
task.status = Status::Succeeded; match user_result {
task.details = Some(Details::DocumentDeletion { Ok(count) => {
provided_ids, task.status = Status::Succeeded;
deleted_documents: Some(count), task.details = Some(Details::DocumentDeletion {
}); provided_ids,
deleted_documents: Some(count),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
} }
} }
} }
if !tasks.iter().all(|res| res.error.is_some()) { if !tasks.iter().all(|res| res.error.is_some()) {
let addition = builder.execute()?; let addition = builder.execute()?;
tracing::info!(indexing_result = ?addition, "document indexing done"); info!("document addition done: {:?}", addition);
} else if primary_key_has_been_set { } else if primary_key_has_been_set {
// Everything failed but we've set a primary key. // Everything failed but we've set a primary key.
// We need to remove it. // We need to remove it.
@@ -1315,13 +1232,31 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config); milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.reset_primary_key(); builder.reset_primary_key();
builder.execute( builder.execute(
|indexing_step| tracing::trace!(update = ?indexing_step), |indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.clone().get(), || must_stop_processing.clone().get(),
)?; )?;
} }
Ok(tasks) Ok(tasks)
} }
IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
let mut builder = milli::update::DeleteDocuments::new(index_wtxn, index)?;
documents.iter().flatten().for_each(|id| {
builder.delete_external_id(id);
});
let DocumentDeletionResult { deleted_documents, .. } = builder.execute()?;
for (task, documents) in tasks.iter_mut().zip(documents) {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids: documents.len(),
deleted_documents: Some(deleted_documents.min(documents.len() as u64)),
});
}
Ok(tasks)
}
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => { IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let filter = let filter =
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } = if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
@@ -1331,13 +1266,7 @@ impl IndexScheduler {
} else { } else {
unreachable!() unreachable!()
}; };
let deleted_documents = delete_document_by_filter( let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
index_wtxn,
filter,
self.index_mapper.indexer_config(),
self.must_stop_processing.clone(),
index,
);
let original_filter = if let Some(Details::DocumentDeletionByFilter { let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter, original_filter,
deleted_documents: _, deleted_documents: _,
@@ -1385,7 +1314,7 @@ impl IndexScheduler {
let must_stop_processing = self.must_stop_processing.clone(); let must_stop_processing = self.must_stop_processing.clone();
builder.execute( builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step), |indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(), || must_stop_processing.get(),
)?; )?;
@@ -1459,11 +1388,7 @@ impl IndexScheduler {
/// Delete each given task from all the databases (if it is deleteable). /// Delete each given task from all the databases (if it is deleteable).
/// ///
/// Return the number of tasks that were actually deleted. /// Return the number of tasks that were actually deleted.
fn delete_matched_tasks( fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
&self,
wtxn: &mut RwTxn,
matched_tasks: &RoaringBitmap,
) -> Result<RoaringBitmap> {
// 1. Remove from this list the tasks that we are not allowed to delete // 1. Remove from this list the tasks that we are not allowed to delete
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?; let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone(); let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
@@ -1515,9 +1440,10 @@ impl IndexScheduler {
} }
for task in to_delete_tasks.iter() { for task in to_delete_tasks.iter() {
self.all_tasks.delete(wtxn, &task)?; self.all_tasks.delete(wtxn, &BEU32::new(task))?;
} }
for canceled_by in affected_canceled_by { for canceled_by in affected_canceled_by {
let canceled_by = BEU32::new(canceled_by);
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? { if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
tasks -= &to_delete_tasks; tasks -= &to_delete_tasks;
if tasks.is_empty() { if tasks.is_empty() {
@@ -1528,7 +1454,7 @@ impl IndexScheduler {
} }
} }
Ok(to_delete_tasks) Ok(to_delete_tasks.len())
} }
/// Cancel each given task from all the databases (if it is cancelable). /// Cancel each given task from all the databases (if it is cancelable).
@@ -1565,17 +1491,15 @@ impl IndexScheduler {
task.details = task.details.map(|d| d.to_failed()); task.details = task.details.map(|d| d.to_failed());
self.update_task(wtxn, &task)?; self.update_task(wtxn, &task)?;
} }
self.canceled_by.put(wtxn, &cancel_task_id, &tasks_to_cancel)?; self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?;
Ok(content_files_to_delete) Ok(content_files_to_delete)
} }
} }
fn delete_document_by_filter<'a>( fn delete_document_by_filter<'a>(
wtxn: &mut RwTxn<'a>, wtxn: &mut RwTxn<'a, '_>,
filter: &serde_json::Value, filter: &serde_json::Value,
indexer_config: &IndexerConfig,
must_stop_processing: MustStopProcessing,
index: &'a Index, index: &'a Index,
) -> Result<u64> { ) -> Result<u64> {
let filter = Filter::from_json(filter)?; let filter = Filter::from_json(filter)?;
@@ -1586,26 +1510,9 @@ fn delete_document_by_filter<'a>(
} }
e => e.into(), e => e.into(),
})?; })?;
let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
let config = IndexDocumentsConfig { delete_operation.delete_documents(&candidates);
update_method: IndexDocumentsMethod::ReplaceDocuments, delete_operation.execute().map(|result| result.deleted_documents)?
..Default::default()
};
let mut builder = milli::update::IndexDocuments::new(
wtxn,
index,
indexer_config,
config,
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)?;
let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?;
builder = new_builder;
let _ = builder.execute()?;
count
} else { } else {
0 0
}) })

View File

@@ -108,8 +108,6 @@ pub enum Error {
TaskDeletionWithEmptyQuery, TaskDeletionWithEmptyQuery,
#[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")] #[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
TaskCancelationWithEmptyQuery, TaskCancelationWithEmptyQuery,
#[error("Aborted task")]
AbortedTask,
#[error(transparent)] #[error(transparent)]
Dump(#[from] dump::Error), Dump(#[from] dump::Error),
@@ -177,7 +175,6 @@ impl Error {
| Error::TaskNotFound(_) | Error::TaskNotFound(_)
| Error::TaskDeletionWithEmptyQuery | Error::TaskDeletionWithEmptyQuery
| Error::TaskCancelationWithEmptyQuery | Error::TaskCancelationWithEmptyQuery
| Error::AbortedTask
| Error::Dump(_) | Error::Dump(_)
| Error::Heed(_) | Error::Heed(_)
| Error::Milli(_) | Error::Milli(_)
@@ -239,9 +236,6 @@ impl ErrorCode for Error {
Error::TaskDatabaseUpdate(_) => Code::Internal, Error::TaskDatabaseUpdate(_) => Code::Internal,
Error::CreateBatch(_) => Code::Internal, Error::CreateBatch(_) => Code::Internal,
// This one should never be seen by the end user
Error::AbortedTask => Code::Internal,
#[cfg(test)] #[cfg(test)]
Error::PlannedFailure => Code::Internal, Error::PlannedFailure => Code::Internal,
} }

View File

@@ -1,8 +1,6 @@
use std::sync::{Arc, RwLock};
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use crate::error::FeatureNotEnabledError; use crate::error::FeatureNotEnabledError;
use crate::Result; use crate::Result;
@@ -11,94 +9,73 @@ const EXPERIMENTAL_FEATURES: &str = "experimental-features";
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct FeatureData { pub(crate) struct FeatureData {
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>, runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
runtime: Arc<RwLock<RuntimeTogglableFeatures>>, instance: InstanceTogglableFeatures,
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub struct RoFeatures { pub struct RoFeatures {
runtime: RuntimeTogglableFeatures, runtime: RuntimeTogglableFeatures,
instance: InstanceTogglableFeatures,
} }
impl RoFeatures { impl RoFeatures {
fn new(data: &FeatureData) -> Self { fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
let runtime = data.runtime_features(); let runtime = data.runtime_features(txn)?;
Self { runtime } Ok(Self { runtime, instance: data.instance })
} }
pub fn runtime_features(&self) -> RuntimeTogglableFeatures { pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
self.runtime self.runtime
} }
pub fn check_score_details(&self) -> Result<()> {
if self.runtime.score_details {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Computing score details",
feature: "score details",
issue_link: "https://github.com/meilisearch/product/discussions/674",
}
.into())
}
}
pub fn check_metrics(&self) -> Result<()> { pub fn check_metrics(&self) -> Result<()> {
if self.runtime.metrics { if self.instance.metrics {
Ok(()) Ok(())
} else { } else {
Err(FeatureNotEnabledError { Err(FeatureNotEnabledError {
disabled_action: "Getting metrics", disabled_action: "Getting metrics",
feature: "metrics", feature: "metrics",
issue_link: "https://github.com/meilisearch/product/discussions/625", issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
} }
.into()) .into())
} }
} }
pub fn check_logs_route(&self) -> Result<()> { pub fn check_vector(&self) -> Result<()> {
if self.runtime.logs_route {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Modifying logs through the `/logs/*` routes",
feature: "logs route",
issue_link: "https://github.com/orgs/meilisearch/discussions/721",
}
.into())
}
}
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.vector_store { if self.runtime.vector_store {
Ok(()) Ok(())
} else { } else {
Err(FeatureNotEnabledError { Err(FeatureNotEnabledError {
disabled_action, disabled_action: "Passing `vector` as a query parameter",
feature: "vector store", feature: "vector store",
issue_link: "https://github.com/meilisearch/product/discussions/677", issue_link: "https://github.com/meilisearch/product/discussions/677",
} }
.into()) .into())
} }
} }
pub fn check_puffin(&self) -> Result<()> {
if self.runtime.export_puffin_reports {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Outputting Puffin reports to disk",
feature: "export puffin reports",
issue_link: "https://github.com/meilisearch/product/discussions/693",
}
.into())
}
}
} }
impl FeatureData { impl FeatureData {
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> { pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?; let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
wtxn.commit()?; wtxn.commit()?;
let txn = env.read_txn()?; Ok(Self { runtime: runtime_features, instance: instance_features })
let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: instance_features.metrics || persisted_features.metrics,
logs_route: instance_features.logs_route || persisted_features.logs_route,
..persisted_features
}));
Ok(Self { persisted: runtime_features_db, runtime })
} }
pub fn put_runtime_features( pub fn put_runtime_features(
@@ -106,25 +83,16 @@ impl FeatureData {
mut wtxn: RwTxn, mut wtxn: RwTxn,
features: RuntimeTogglableFeatures, features: RuntimeTogglableFeatures,
) -> Result<()> { ) -> Result<()> {
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?; self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
wtxn.commit()?; wtxn.commit()?;
// safe to unwrap, the lock will only fail if:
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
// 2. there's a panic while the thread is held -> it is only used for an assignment here.
let mut toggled_features = self.runtime.write().unwrap();
*toggled_features = features;
Ok(()) Ok(())
} }
fn runtime_features(&self) -> RuntimeTogglableFeatures { fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
// sound to unwrap, the lock will only fail if: Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
// 2. there's a panic while the thread is held -> it is only used for copying the data here
*self.runtime.read().unwrap()
} }
pub fn features(&self) -> RoFeatures { pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
RoFeatures::new(self) RoFeatures::new(txn, self)
} }
} }

View File

@@ -1,8 +1,12 @@
/// the map size to use when we don't succeed in reading it in indexes.
const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::path::Path; use std::path::Path;
use std::time::Duration; use std::time::Duration;
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions}; use meilisearch_types::heed::flags::Flags;
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
use meilisearch_types::milli::Index; use meilisearch_types::milli::Index;
use time::OffsetDateTime; use time::OffsetDateTime;
use uuid::Uuid; use uuid::Uuid;
@@ -232,7 +236,7 @@ impl IndexMap {
enable_mdb_writemap: bool, enable_mdb_writemap: bool,
map_size_growth: usize, map_size_growth: usize,
) { ) {
let map_size = index.map_size() + map_size_growth; let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
let closing_event = index.prepare_for_closing(); let closing_event = index.prepare_for_closing();
let generation = self.next_generation(); let generation = self.next_generation();
self.unavailable.insert( self.unavailable.insert(
@@ -305,7 +309,7 @@ fn create_or_open_index(
options.map_size(clamp_to_page_size(map_size)); options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024); options.max_readers(1024);
if enable_mdb_writemap { if enable_mdb_writemap {
unsafe { options.flags(EnvFlags::WRITE_MAP) }; unsafe { options.flag(Flags::MdbWriteMap) };
} }
if let Some((created, updated)) = date { if let Some((created, updated)) = date {
@@ -384,7 +388,7 @@ mod tests {
fn assert_index_size(index: Index, expected: usize) { fn assert_index_size(index: Index, expected: usize) {
let expected = clamp_to_page_size(expected); let expected = clamp_to_page_size(expected);
let index_map_size = index.map_size(); let index_map_size = index.map_size().unwrap();
assert_eq!(index_map_size, expected); assert_eq!(index_map_size, expected);
} }
} }

View File

@@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock};
use std::time::Duration; use std::time::Duration;
use std::{fs, thread}; use std::{fs, thread};
use log::error;
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index}; use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::error;
use uuid::Uuid; use uuid::Uuid;
use self::index_map::IndexMap; use self::index_map::IndexMap;
@@ -69,10 +69,6 @@ pub struct IndexMapper {
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not. /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
enable_mdb_writemap: bool, enable_mdb_writemap: bool,
pub indexer_config: Arc<IndexerConfig>, pub indexer_config: Arc<IndexerConfig>,
/// A few types of long running batches of tasks that act on a single index set this field
/// so that a handle to the index is available from other threads (search) in an optimized manner.
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
} }
/// Whether the index is available for use or is forbidden to be inserted back in the index map /// Whether the index is available for use or is forbidden to be inserted back in the index map
@@ -155,7 +151,6 @@ impl IndexMapper {
index_growth_amount, index_growth_amount,
enable_mdb_writemap, enable_mdb_writemap,
indexer_config: Arc::new(indexer_config), indexer_config: Arc::new(indexer_config),
currently_updating_index: Default::default(),
}) })
} }
@@ -308,14 +303,6 @@ impl IndexMapper {
/// Return an index, may open it if it wasn't already opened. /// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> { pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
if let Some((current_name, current_index)) =
self.currently_updating_index.read().unwrap().as_ref()
{
if current_name == name {
return Ok(current_index.clone());
}
}
let uuid = self let uuid = self
.index_mapping .index_mapping
.get(rtxn, name)? .get(rtxn, name)?
@@ -487,8 +474,4 @@ impl IndexMapper {
pub fn indexer_config(&self) -> &IndexerConfig { pub fn indexer_config(&self) -> &IndexerConfig {
&self.indexer_config &self.indexer_config
} }
pub fn set_currently_updating_index(&self, index: Option<(String, Index)>) {
*self.currently_updating_index.write().unwrap() = index;
}
} }

View File

@@ -1,7 +1,7 @@
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::fmt::Write; use std::fmt::Write;
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn}; use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Task}; use meilisearch_types::tasks::{Details, Task};
@@ -30,19 +30,14 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
index_mapper, index_mapper,
features: _, features: _,
max_number_of_tasks: _, max_number_of_tasks: _,
max_number_of_batched_tasks: _,
puffin_frame: _,
wake_up: _, wake_up: _,
dumps_path: _, dumps_path: _,
snapshots_path: _, snapshots_path: _,
auth_path: _, auth_path: _,
version_file_path: _, version_file_path: _,
webhook_url: _,
webhook_authorization_header: _,
test_breakpoint_sdr: _, test_breakpoint_sdr: _,
planned_failures: _, planned_failures: _,
run_loop_iteration: _, run_loop_iteration: _,
embedders: _,
} = scheduler; } = scheduler;
let rtxn = env.read_txn().unwrap(); let rtxn = env.read_txn().unwrap();
@@ -118,7 +113,7 @@ pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
snap snap
} }
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Task>>) -> String { pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String {
let mut snap = String::new(); let mut snap = String::new();
let iter = db.iter(rtxn).unwrap(); let iter = db.iter(rtxn).unwrap();
for next in iter { for next in iter {
@@ -128,7 +123,10 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Task>>) ->
snap snap
} }
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String { pub fn snapshot_date_db(
rtxn: &RoTxn,
db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
) -> String {
let mut snap = String::new(); let mut snap = String::new();
let iter = db.iter(rtxn).unwrap(); let iter = db.iter(rtxn).unwrap();
for next in iter { for next in iter {
@@ -248,7 +246,10 @@ pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>)
} }
snap snap
} }
pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec>) -> String { pub fn snapshot_canceled_by(
rtxn: &RoTxn,
db: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
) -> String {
let mut snap = String::new(); let mut snap = String::new();
let iter = db.iter(rtxn).unwrap(); let iter = db.iter(rtxn).unwrap();
for next in iter { for next in iter {

View File

@@ -27,14 +27,12 @@ mod index_mapper;
mod insta_snapshot; mod insta_snapshot;
mod lru; mod lru;
mod utils; mod utils;
pub mod uuid_codec; mod uuid_codec;
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T> = std::result::Result<T, Error>;
pub type TaskId = u32; pub type TaskId = u32;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::{self, BufReader, Read};
use std::ops::{Bound, RangeBounds}; use std::ops::{Bound, RangeBounds};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicBool;
@@ -46,20 +44,14 @@ use dump::{KindDump, TaskDump, UpdateFile};
pub use error::Error; pub use error::Error;
pub use features::RoFeatures; pub use features::RoFeatures;
use file_store::FileStore; use file_store::FileStore;
use flate2::bufread::GzEncoder;
use flate2::Compression;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::byteorder::BE; use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128}; use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
use meilisearch_types::milli::documents::DocumentsBatchBuilder; use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use puffin::FrameView;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use synchronoise::SignalEvent; use synchronoise::SignalEvent;
use time::format_description::well_known::Rfc3339; use time::format_description::well_known::Rfc3339;
@@ -70,7 +62,8 @@ use uuid::Uuid;
use crate::index_mapper::IndexMapper; use crate::index_mapper::IndexMapper;
use crate::utils::{check_index_swap_validity, clamp_to_page_size}; use crate::utils::{check_index_swap_validity, clamp_to_page_size};
pub(crate) type BEI128 = I128<BE>; pub(crate) type BEI128 =
meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>;
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. /// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
/// ///
@@ -174,8 +167,8 @@ impl ProcessingTasks {
} }
/// Set the processing tasks to an empty list /// Set the processing tasks to an empty list
fn stop_processing(&mut self) -> RoaringBitmap { fn stop_processing(&mut self) {
std::mem::take(&mut self.processing) self.processing = RoaringBitmap::new();
} }
/// Returns `true` if there, at least, is one task that is currently processing that we must stop. /// Returns `true` if there, at least, is one task that is currently processing that we must stop.
@@ -245,10 +238,6 @@ pub struct IndexSchedulerOptions {
pub snapshots_path: PathBuf, pub snapshots_path: PathBuf,
/// The path to the folder containing the dumps. /// The path to the folder containing the dumps.
pub dumps_path: PathBuf, pub dumps_path: PathBuf,
/// The URL on which we must send the tasks statuses
pub webhook_url: Option<String>,
/// The value we will send into the Authorization HTTP header on the webhook URL
pub webhook_authorization_header: Option<String>,
/// The maximum size, in bytes, of the task index. /// The maximum size, in bytes, of the task index.
pub task_db_size: usize, pub task_db_size: usize,
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index. /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
@@ -267,9 +256,6 @@ pub struct IndexSchedulerOptions {
/// The maximum number of tasks stored in the task queue before starting /// The maximum number of tasks stored in the task queue before starting
/// to auto schedule task deletions. /// to auto schedule task deletions.
pub max_number_of_tasks: usize, pub max_number_of_tasks: usize,
/// If the autobatcher is allowed to automatically batch tasks
/// it will only batch this defined number of tasks at once.
pub max_number_of_batched_tasks: usize,
/// The experimental features enabled for this instance. /// The experimental features enabled for this instance.
pub instance_features: InstanceTogglableFeatures, pub instance_features: InstanceTogglableFeatures,
} }
@@ -290,7 +276,7 @@ pub struct IndexScheduler {
pub(crate) file_store: FileStore, pub(crate) file_store: FileStore,
// The main database, it contains all the tasks accessible by their Id. // The main database, it contains all the tasks accessible by their Id.
pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>, pub(crate) all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>>,
/// All the tasks ids grouped by their status. /// All the tasks ids grouped by their status.
// TODO we should not be able to serialize a `Status::Processing` in this database. // TODO we should not be able to serialize a `Status::Processing` in this database.
@@ -301,16 +287,16 @@ pub struct IndexScheduler {
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>, pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
/// Store the tasks that were canceled by a task uid /// Store the tasks that were canceled by a task uid
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>, pub(crate) canceled_by: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
/// Store the task ids of tasks which were enqueued at a specific date /// Store the task ids of tasks which were enqueued at a specific date
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>, pub(crate) enqueued_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
/// Store the task ids of finished tasks which started being processed at a specific date /// Store the task ids of finished tasks which started being processed at a specific date
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>, pub(crate) started_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
/// Store the task ids of tasks which finished at a specific date /// Store the task ids of tasks which finished at a specific date
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>, pub(crate) finished_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
/// In charge of creating, opening, storing and returning indexes. /// In charge of creating, opening, storing and returning indexes.
pub(crate) index_mapper: IndexMapper, pub(crate) index_mapper: IndexMapper,
@@ -328,17 +314,6 @@ pub struct IndexScheduler {
/// the finished tasks automatically. /// the finished tasks automatically.
pub(crate) max_number_of_tasks: usize, pub(crate) max_number_of_tasks: usize,
/// The maximum number of tasks that will be batched together.
pub(crate) max_number_of_batched_tasks: usize,
/// The webhook url we should send tasks to after processing every batches.
pub(crate) webhook_url: Option<String>,
/// The Authorization header to send to the webhook URL.
pub(crate) webhook_authorization_header: Option<String>,
/// A frame to output the indexation profiling files to disk.
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
/// The path used to create the dumps. /// The path used to create the dumps.
pub(crate) dumps_path: PathBuf, pub(crate) dumps_path: PathBuf,
@@ -351,8 +326,6 @@ pub struct IndexScheduler {
/// The path to the version file of Meilisearch. /// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf, pub(crate) version_file_path: PathBuf,
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
// ================= test // ================= test
// The next entry is dedicated to the tests. // The next entry is dedicated to the tests.
/// Provide a way to set a breakpoint in multiple part of the scheduler. /// Provide a way to set a breakpoint in multiple part of the scheduler.
@@ -391,15 +364,10 @@ impl IndexScheduler {
wake_up: self.wake_up.clone(), wake_up: self.wake_up.clone(),
autobatching_enabled: self.autobatching_enabled, autobatching_enabled: self.autobatching_enabled,
max_number_of_tasks: self.max_number_of_tasks, max_number_of_tasks: self.max_number_of_tasks,
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
puffin_frame: self.puffin_frame.clone(),
snapshots_path: self.snapshots_path.clone(), snapshots_path: self.snapshots_path.clone(),
dumps_path: self.dumps_path.clone(), dumps_path: self.dumps_path.clone(),
auth_path: self.auth_path.clone(), auth_path: self.auth_path.clone(),
version_file_path: self.version_file_path.clone(), version_file_path: self.version_file_path.clone(),
webhook_url: self.webhook_url.clone(),
webhook_authorization_header: self.webhook_authorization_header.clone(),
embedders: self.embedders.clone(),
#[cfg(test)] #[cfg(test)]
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(), test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
#[cfg(test)] #[cfg(test)]
@@ -489,17 +457,12 @@ impl IndexScheduler {
env, env,
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
wake_up: Arc::new(SignalEvent::auto(true)), wake_up: Arc::new(SignalEvent::auto(true)),
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
autobatching_enabled: options.autobatching_enabled, autobatching_enabled: options.autobatching_enabled,
max_number_of_tasks: options.max_number_of_tasks, max_number_of_tasks: options.max_number_of_tasks,
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
dumps_path: options.dumps_path, dumps_path: options.dumps_path,
snapshots_path: options.snapshots_path, snapshots_path: options.snapshots_path,
auth_path: options.auth_path, auth_path: options.auth_path,
version_file_path: options.version_file_path, version_file_path: options.version_file_path,
webhook_url: options.webhook_url,
webhook_authorization_header: options.webhook_authorization_header,
embedders: Default::default(),
#[cfg(test)] #[cfg(test)]
test_breakpoint_sdr, test_breakpoint_sdr,
@@ -535,17 +498,17 @@ impl IndexScheduler {
let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) { let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) {
DEFAULT_BUDGET DEFAULT_BUDGET
} else { } else {
tracing::debug!("determining budget with dichotomic search"); log::debug!("determining budget with dichotomic search");
utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| { utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| {
Self::is_good_heed(tasks_path, map_size) Self::is_good_heed(tasks_path, map_size)
}) })
}; };
tracing::debug!("memmap budget: {budget}B"); log::debug!("memmap budget: {budget}B");
let mut budget = budget / 2; let mut budget = budget / 2;
if task_db_size > (budget / 2) { if task_db_size > (budget / 2) {
task_db_size = clamp_to_page_size(budget * 2 / 5); task_db_size = clamp_to_page_size(budget * 2 / 5);
tracing::debug!( log::debug!(
"Decreasing max size of task DB to {task_db_size}B due to constrained memory space" "Decreasing max size of task DB to {task_db_size}B due to constrained memory space"
); );
} }
@@ -555,13 +518,13 @@ impl IndexScheduler {
let budget = budget; let budget = budget;
let task_db_size = task_db_size; let task_db_size = task_db_size;
tracing::debug!("index budget: {budget}B"); log::debug!("index budget: {budget}B");
let mut index_count = budget / base_map_size; let mut index_count = budget / base_map_size;
if index_count < 2 { if index_count < 2 {
// take a bit less than half than the budget to make sure we can always afford to open an index // take a bit less than half than the budget to make sure we can always afford to open an index
let map_size = (budget * 2) / 5; let map_size = (budget * 2) / 5;
// single index of max budget // single index of max budget
tracing::debug!("1 index of {map_size}B can be opened simultaneously."); log::debug!("1 index of {map_size}B can be opened simultaneously.");
return IndexBudget { map_size, index_count: 1, task_db_size }; return IndexBudget { map_size, index_count: 1, task_db_size };
} }
// give us some space for an additional index when the cache is already full // give us some space for an additional index when the cache is already full
@@ -570,7 +533,7 @@ impl IndexScheduler {
if index_count > max_index_count { if index_count > max_index_count {
index_count = max_index_count; index_count = max_index_count;
} }
tracing::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously."); log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
IndexBudget { map_size: base_map_size, index_count, task_db_size } IndexBudget { map_size: base_map_size, index_count, task_db_size }
} }
@@ -609,46 +572,17 @@ impl IndexScheduler {
run.wake_up.wait(); run.wake_up.wait();
loop { loop {
let puffin_enabled = run.features().check_puffin().is_ok();
puffin::set_scopes_on(puffin_enabled);
puffin::GlobalProfiler::lock().new_frame();
match run.tick() { match run.tick() {
Ok(TickOutcome::TickAgain(_)) => (), Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(), Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Err(e) => { Err(e) => {
tracing::error!("{e}"); log::error!("{}", e);
// Wait one second when an irrecoverable error occurs. // Wait one second when an irrecoverable error occurs.
if !e.is_recoverable() { if !e.is_recoverable() {
std::thread::sleep(Duration::from_secs(1)); std::thread::sleep(Duration::from_secs(1));
} }
} }
} }
// Let's write the previous frame to disk but only if
// the user wanted to profile with puffin.
if puffin_enabled {
let mut frame_view = run.puffin_frame.lock();
if !frame_view.is_empty() {
let now = OffsetDateTime::now_utc();
let mut file = match File::create(format!("{}.puffin", now)) {
Ok(file) => file,
Err(e) => {
tracing::error!("{e}");
continue;
}
};
if let Err(e) = frame_view.save_to_writer(&mut file) {
tracing::error!("{e}");
}
if let Err(e) = file.sync_all() {
tracing::error!("{e}");
}
// We erase this frame view as it is no more useful. We want to
// measure the new frames now that we exported the previous ones.
*frame_view = FrameView::default();
}
}
} }
}) })
.unwrap(); .unwrap();
@@ -747,7 +681,9 @@ impl IndexScheduler {
if let Some(canceled_by) = &query.canceled_by { if let Some(canceled_by) = &query.canceled_by {
let mut all_canceled_tasks = RoaringBitmap::new(); let mut all_canceled_tasks = RoaringBitmap::new();
for cancel_task_uid in canceled_by { for cancel_task_uid in canceled_by {
if let Some(canceled_by_uid) = self.canceled_by.get(rtxn, cancel_task_uid)? { if let Some(canceled_by_uid) =
self.canceled_by.get(rtxn, &BEU32::new(*cancel_task_uid))?
{
all_canceled_tasks |= canceled_by_uid; all_canceled_tasks |= canceled_by_uid;
} }
} }
@@ -998,7 +934,7 @@ impl IndexScheduler {
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty()) if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50 && (self.env.non_free_pages_size()? * 100) / self.env.map_size()? as u64 > 50
{ {
return Err(Error::NoSpaceLeftInTaskQueue); return Err(Error::NoSpaceLeftInTaskQueue);
} }
@@ -1024,7 +960,7 @@ impl IndexScheduler {
// Get rid of the mutability. // Get rid of the mutability.
let task = task; let task = task;
self.all_tasks.put_with_flags(&mut wtxn, PutFlags::APPEND, &task.uid, &task)?; self.all_tasks.append(&mut wtxn, &BEU32::new(task.uid), &task)?;
for index in task.indexes() { for index in task.indexes() {
self.update_index(&mut wtxn, index, |bitmap| { self.update_index(&mut wtxn, index, |bitmap| {
@@ -1126,6 +1062,8 @@ impl IndexScheduler {
self.breakpoint(Breakpoint::Start); self.breakpoint(Breakpoint::Start);
} }
puffin::GlobalProfiler::lock().new_frame();
self.cleanup_task_queue()?; self.cleanup_task_queue()?;
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@@ -1161,9 +1099,6 @@ impl IndexScheduler {
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
}; };
// Reset the currently updating index to relinquish the index handle
self.index_mapper.set_currently_updating_index(None);
#[cfg(test)] #[cfg(test)]
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?; self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
@@ -1190,19 +1125,18 @@ impl IndexScheduler {
self.update_task(&mut wtxn, &task) self.update_task(&mut wtxn, &task)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
if let Err(e) = self.delete_persisted_task_data(&task) { if let Err(e) = self.delete_persisted_task_data(&task) {
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
} }
} }
tracing::info!("A batch of tasks was successfully completed."); log::info!("A batch of tasks was successfully completed.");
} }
// If we have an abortion error we must stop the tick here and re-schedule tasks. // If we have an abortion error we must stop the tick here and re-schedule tasks.
Err(Error::Milli(milli::Error::InternalError( Err(Error::Milli(milli::Error::InternalError(
milli::InternalError::AbortedIndexation, milli::InternalError::AbortedIndexation,
))) ))) => {
| Err(Error::AbortedTask) => {
#[cfg(test)] #[cfg(test)]
self.breakpoint(Breakpoint::AbortedIndexation); self.breakpoint(Breakpoint::AbortedIndexation);
wtxn.abort(); wtxn.abort().map_err(Error::HeedTransaction)?;
// We make sure that we don't call `stop_processing` on the `processing_tasks`, // We make sure that we don't call `stop_processing` on the `processing_tasks`,
// this is because we want to let the next tick call `create_next_batch` and keep // this is because we want to let the next tick call `create_next_batch` and keep
@@ -1223,7 +1157,7 @@ impl IndexScheduler {
let index_uid = index_uid.unwrap(); let index_uid = index_uid.unwrap();
// fixme: handle error more gracefully? not sure when this could happen // fixme: handle error more gracefully? not sure when this could happen
self.index_mapper.resize_index(&wtxn, &index_uid)?; self.index_mapper.resize_index(&wtxn, &index_uid)?;
wtxn.abort(); wtxn.abort().map_err(Error::HeedTransaction)?;
return Ok(TickOutcome::TickAgain(0)); return Ok(TickOutcome::TickAgain(0));
} }
@@ -1247,7 +1181,7 @@ impl IndexScheduler {
self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?; self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?;
if let Err(e) = self.delete_persisted_task_data(&task) { if let Err(e) = self.delete_persisted_task_data(&task) {
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
} }
self.update_task(&mut wtxn, &task) self.update_task(&mut wtxn, &task)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
@@ -1255,99 +1189,19 @@ impl IndexScheduler {
} }
} }
let processed = self.processing_tasks.write().unwrap().stop_processing(); self.processing_tasks.write().unwrap().stop_processing();
#[cfg(test)] #[cfg(test)]
self.maybe_fail(tests::FailureLocation::CommittingWtxn)?; self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;
wtxn.commit().map_err(Error::HeedTransaction)?; wtxn.commit().map_err(Error::HeedTransaction)?;
// We shouldn't crash the tick function if we can't send data to the webhook.
let _ = self.notify_webhook(&processed);
#[cfg(test)] #[cfg(test)]
self.breakpoint(Breakpoint::AfterProcessing); self.breakpoint(Breakpoint::AfterProcessing);
Ok(TickOutcome::TickAgain(processed_tasks)) Ok(TickOutcome::TickAgain(processed_tasks))
} }
/// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
if let Some(ref url) = self.webhook_url {
struct TaskReader<'a, 'b> {
rtxn: &'a RoTxn<'a>,
index_scheduler: &'a IndexScheduler,
tasks: &'b mut roaring::bitmap::Iter<'b>,
buffer: Vec<u8>,
written: usize,
}
impl<'a, 'b> Read for TaskReader<'a, 'b> {
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
if self.buffer.is_empty() {
match self.tasks.next() {
None => return Ok(0),
Some(task_id) => {
let task = self
.index_scheduler
.get_task(self.rtxn, task_id)
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::Other,
Error::CorruptedTaskQueue,
)
})?;
serde_json::to_writer(
&mut self.buffer,
&TaskView::from_task(&task),
)?;
self.buffer.push(b'\n');
}
}
}
let mut to_write = &self.buffer[self.written..];
let wrote = io::copy(&mut to_write, &mut buf)?;
self.written += wrote as usize;
// we wrote everything and must refresh our buffer on the next call
if self.written == self.buffer.len() {
self.written = 0;
self.buffer.clear();
}
Ok(wrote as usize)
}
}
let rtxn = self.env.read_txn()?;
let task_reader = TaskReader {
rtxn: &rtxn,
index_scheduler: self,
tasks: &mut updated.into_iter(),
buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
written: 0,
};
// let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
let request = ureq::post(url).set("Content-Encoding", "gzip");
let request = match &self.webhook_authorization_header {
Some(header) => request.set("Authorization", header),
None => request,
};
if let Err(e) = request.send(reader) {
tracing::error!("While sending data to the webhook: {e}");
}
}
Ok(())
}
/// Register a task to cleanup the task queue if needed /// Register a task to cleanup the task queue if needed
fn cleanup_task_queue(&self) -> Result<()> { fn cleanup_task_queue(&self) -> Result<()> {
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@@ -1367,12 +1221,12 @@ impl IndexScheduler {
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
// the deletion tasks we enqueued ourselves. // the deletion tasks we enqueued ourselves.
if to_delete.len() < 2 { if to_delete.len() < 2 {
tracing::warn!("The task queue is almost full, but no task can be deleted yet."); log::warn!("The task queue is almost full, but no task can be deleted yet.");
// the only thing we can do is hope that the user tasks are going to finish // the only thing we can do is hope that the user tasks are going to finish
return Ok(()); return Ok(());
} }
tracing::info!( log::info!(
"The task queue is almost full. Deleting the oldest {} finished tasks.", "The task queue is almost full. Deleting the oldest {} finished tasks.",
to_delete.len() to_delete.len()
); );
@@ -1405,8 +1259,9 @@ impl IndexScheduler {
Ok(IndexStats { is_indexing, inner_stats: index_stats }) Ok(IndexStats { is_indexing, inner_stats: index_stats })
} }
pub fn features(&self) -> RoFeatures { pub fn features(&self) -> Result<RoFeatures> {
self.features.features() let rtxn = self.read_txn()?;
self.features.features(rtxn)
} }
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> { pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
@@ -1422,40 +1277,6 @@ impl IndexScheduler {
} }
} }
// TODO: consider using a type alias or a struct embedder/template
pub fn embedders(
&self,
embedding_configs: Vec<(String, milli::vector::EmbeddingConfig)>,
) -> Result<EmbeddingConfigs> {
let res: Result<_> = embedding_configs
.into_iter()
.map(|(name, milli::vector::EmbeddingConfig { embedder_options, prompt })| {
let prompt =
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
// optimistically return existing embedder
{
let embedders = self.embedders.read().unwrap();
if let Some(embedder) = embedders.get(&embedder_options) {
return Ok((name, (embedder.clone(), prompt)));
}
}
// add missing embedder
let embedder = Arc::new(
Embedder::new(embedder_options.clone())
.map_err(meilisearch_types::milli::vector::Error::from)
.map_err(meilisearch_types::milli::Error::from)?,
);
{
let mut embedders = self.embedders.write().unwrap();
embedders.insert(embedder_options, embedder.clone());
}
Ok((name, (embedder, prompt)))
})
.collect();
res.map(EmbeddingConfigs::new)
}
/// Blocks the thread until the test handle asks to progress to/through this breakpoint. /// Blocks the thread until the test handle asks to progress to/through this breakpoint.
/// ///
/// Two messages are sent through the channel for each breakpoint. /// Two messages are sent through the channel for each breakpoint.
@@ -1483,7 +1304,7 @@ impl IndexScheduler {
pub struct Dump<'a> { pub struct Dump<'a> {
index_scheduler: &'a IndexScheduler, index_scheduler: &'a IndexScheduler,
wtxn: RwTxn<'a>, wtxn: RwTxn<'a, 'a>,
indexes: HashMap<String, RoaringBitmap>, indexes: HashMap<String, RoaringBitmap>,
statuses: HashMap<Status, RoaringBitmap>, statuses: HashMap<Status, RoaringBitmap>,
@@ -1598,7 +1419,7 @@ impl<'a> Dump<'a> {
}, },
}; };
self.index_scheduler.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?;
for index in task.indexes() { for index in task.indexes() {
match self.indexes.get_mut(index) { match self.indexes.get_mut(index) {
@@ -1640,8 +1461,8 @@ impl<'a> Dump<'a> {
} }
} }
self.statuses.entry(task.status).or_default().insert(task.uid); self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid);
self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid); self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid);
Ok(task) Ok(task)
} }
@@ -1761,8 +1582,6 @@ mod tests {
indexes_path: tempdir.path().join("indexes"), indexes_path: tempdir.path().join("indexes"),
snapshots_path: tempdir.path().join("snapshots"), snapshots_path: tempdir.path().join("snapshots"),
dumps_path: tempdir.path().join("dumps"), dumps_path: tempdir.path().join("dumps"),
webhook_url: None,
webhook_authorization_header: None,
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
enable_mdb_writemap: false, enable_mdb_writemap: false,
@@ -1771,7 +1590,6 @@ mod tests {
indexer_config, indexer_config,
autobatching_enabled: true, autobatching_enabled: true,
max_number_of_tasks: 1_000_000, max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: usize::MAX,
instance_features: Default::default(), instance_features: Default::default(),
}; };
configuration(&mut options); configuration(&mut options);
@@ -2244,7 +2062,10 @@ mod tests {
.unwrap(); .unwrap();
index_scheduler.assert_internally_consistent(); index_scheduler.assert_internally_consistent();
} }
handle.advance_one_successful_batch(); for _ in 0..2 {
handle.advance_one_successful_batch();
index_scheduler.assert_internally_consistent();
}
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed");
} }
@@ -4469,26 +4290,4 @@ mod tests {
} }
"###); "###);
} }
#[test]
fn cancel_processing_dump() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
let dump_cancellation = KindWithContent::TaskCancelation {
query: "cancel dump".to_owned(),
tasks: RoaringBitmap::from_iter([0]),
};
let _ = index_scheduler.register(dump_creation).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
let _ = index_scheduler.register(dump_cancellation).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
}
} }

View File

@@ -1,35 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,45 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [1,]
canceled [0,]
----------------------------------------------------------------------
### Kind:
"taskCancelation" [1,]
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
1 [0,]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,38 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[0,]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"taskCancelation" [1,]
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -34,10 +34,12 @@ catto: { number_of_documents: 1, field_distribution: {"id": 1} }
[timestamp] [3,] [timestamp] [3,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Started At: ### Started At:
[timestamp] [2,3,] [timestamp] [2,]
[timestamp] [3,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Finished At: ### Finished At:
[timestamp] [2,3,] [timestamp] [2,]
[timestamp] [3,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### File Store: ### File Store:
00000000-0000-0000-0000-000000000001 00000000-0000-0000-0000-000000000001

View File

@@ -3,9 +3,9 @@
use std::collections::{BTreeSet, HashSet}; use std::collections::{BTreeSet, HashSet};
use std::ops::Bound; use std::ops::Bound;
use meilisearch_types::heed::types::DecodeIgnore; use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
use meilisearch_types::heed::{Database, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::CboRoaringBitmapCodec; use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime; use time::OffsetDateTime;
@@ -18,7 +18,7 @@ impl IndexScheduler {
} }
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> { pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1)) Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k.get() + 1))
} }
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> { pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
@@ -26,7 +26,7 @@ impl IndexScheduler {
} }
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> { pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
Ok(self.all_tasks.get(rtxn, &task_id)?) Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?)
} }
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
@@ -88,7 +88,7 @@ impl IndexScheduler {
} }
} }
self.all_tasks.put(wtxn, &task.uid, task)?; self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?;
Ok(()) Ok(())
} }
@@ -169,11 +169,11 @@ impl IndexScheduler {
pub(crate) fn insert_task_datetime( pub(crate) fn insert_task_datetime(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
database: Database<BEI128, CboRoaringBitmapCodec>, database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
time: OffsetDateTime, time: OffsetDateTime,
task_id: TaskId, task_id: TaskId,
) -> Result<()> { ) -> Result<()> {
let timestamp = time.unix_timestamp_nanos(); let timestamp = BEI128::new(time.unix_timestamp_nanos());
let mut task_ids = database.get(wtxn, &timestamp)?.unwrap_or_default(); let mut task_ids = database.get(wtxn, &timestamp)?.unwrap_or_default();
task_ids.insert(task_id); task_ids.insert(task_id);
database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(task_ids))?; database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(task_ids))?;
@@ -182,11 +182,11 @@ pub(crate) fn insert_task_datetime(
pub(crate) fn remove_task_datetime( pub(crate) fn remove_task_datetime(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
database: Database<BEI128, CboRoaringBitmapCodec>, database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
time: OffsetDateTime, time: OffsetDateTime,
task_id: TaskId, task_id: TaskId,
) -> Result<()> { ) -> Result<()> {
let timestamp = time.unix_timestamp_nanos(); let timestamp = BEI128::new(time.unix_timestamp_nanos());
if let Some(mut existing) = database.get(wtxn, &timestamp)? { if let Some(mut existing) = database.get(wtxn, &timestamp)? {
existing.remove(task_id); existing.remove(task_id);
if existing.is_empty() { if existing.is_empty() {
@@ -202,7 +202,7 @@ pub(crate) fn remove_task_datetime(
pub(crate) fn keep_tasks_within_datetimes( pub(crate) fn keep_tasks_within_datetimes(
rtxn: &RoTxn, rtxn: &RoTxn,
tasks: &mut RoaringBitmap, tasks: &mut RoaringBitmap,
database: Database<BEI128, CboRoaringBitmapCodec>, database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
after: Option<OffsetDateTime>, after: Option<OffsetDateTime>,
before: Option<OffsetDateTime>, before: Option<OffsetDateTime>,
) -> Result<()> { ) -> Result<()> {
@@ -213,8 +213,8 @@ pub(crate) fn keep_tasks_within_datetimes(
(Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)), (Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
}; };
let mut collected_task_ids = RoaringBitmap::new(); let mut collected_task_ids = RoaringBitmap::new();
let start = map_bound(start, |b| b.unix_timestamp_nanos()); let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos()));
let end = map_bound(end, |b| b.unix_timestamp_nanos()); let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos()));
let iter = database.range(rtxn, &(start, end))?; let iter = database.range(rtxn, &(start, end))?;
for r in iter { for r in iter {
let (_timestamp, task_ids) = r?; let (_timestamp, task_ids) = r?;
@@ -337,6 +337,8 @@ impl IndexScheduler {
let rtxn = self.env.read_txn().unwrap(); let rtxn = self.env.read_txn().unwrap();
for task in self.all_tasks.iter(&rtxn).unwrap() { for task in self.all_tasks.iter(&rtxn).unwrap() {
let (task_id, task) = task.unwrap(); let (task_id, task) = task.unwrap();
let task_id = task_id.get();
let task_index_uid = task.index_uid().map(ToOwned::to_owned); let task_index_uid = task.index_uid().map(ToOwned::to_owned);
let Task { let Task {
@@ -359,13 +361,16 @@ impl IndexScheduler {
.unwrap() .unwrap()
.contains(task.uid)); .contains(task.uid));
} }
let db_enqueued_at = let db_enqueued_at = self
self.enqueued_at.get(&rtxn, &enqueued_at.unix_timestamp_nanos()).unwrap().unwrap(); .enqueued_at
.get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
.unwrap()
.unwrap();
assert!(db_enqueued_at.contains(task_id)); assert!(db_enqueued_at.contains(task_id));
if let Some(started_at) = started_at { if let Some(started_at) = started_at {
let db_started_at = self let db_started_at = self
.started_at .started_at
.get(&rtxn, &started_at.unix_timestamp_nanos()) .get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos()))
.unwrap() .unwrap()
.unwrap(); .unwrap();
assert!(db_started_at.contains(task_id)); assert!(db_started_at.contains(task_id));
@@ -373,7 +378,7 @@ impl IndexScheduler {
if let Some(finished_at) = finished_at { if let Some(finished_at) = finished_at {
let db_finished_at = self let db_finished_at = self
.finished_at .finished_at
.get(&rtxn, &finished_at.unix_timestamp_nanos()) .get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos()))
.unwrap() .unwrap()
.unwrap(); .unwrap();
assert!(db_finished_at.contains(task_id)); assert!(db_finished_at.contains(task_id));

View File

@@ -1,7 +1,7 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::convert::TryInto; use std::convert::TryInto;
use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode}; use meilisearch_types::heed::{BytesDecode, BytesEncode};
use uuid::Uuid; use uuid::Uuid;
/// A heed codec for value of struct Uuid. /// A heed codec for value of struct Uuid.
@@ -10,15 +10,15 @@ pub struct UuidCodec;
impl<'a> BytesDecode<'a> for UuidCodec { impl<'a> BytesDecode<'a> for UuidCodec {
type DItem = Uuid; type DItem = Uuid;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
bytes.try_into().map(Uuid::from_bytes).map_err(Into::into) bytes.try_into().ok().map(Uuid::from_bytes)
} }
} }
impl BytesEncode<'_> for UuidCodec { impl BytesEncode<'_> for UuidCodec {
type EItem = Uuid; type EItem = Uuid;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
Ok(Cow::Borrowed(item.as_bytes())) Some(Cow::Borrowed(item.as_bytes()))
} }
} }

View File

@@ -11,6 +11,6 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
insta = { version = "^1.34.0", features = ["json", "redactions"] } insta = { version = "^1.29.0", features = ["json", "redactions"] }
md5 = "0.7.0" md5 = "0.7.0"
once_cell = "1.19" once_cell = "1.17"

View File

@@ -11,16 +11,16 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
base64 = "0.21.7" base64 = "0.21.0"
enum-iterator = "1.5.0" enum-iterator = "1.4.0"
hmac = "0.12.1" hmac = "0.12.1"
maplit = "1.0.2" maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5" rand = "0.8.5"
roaring = { version = "0.10.2", features = ["serde"] } roaring = { path = "../../roaring-rs", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
sha2 = "0.10.8" sha2 = "0.10.6"
thiserror = "1.0.56" thiserror = "1.0.40"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.3.1", features = ["serde", "v4"] }

View File

@@ -4,20 +4,17 @@ use std::collections::HashSet;
use std::convert::{TryFrom, TryInto}; use std::convert::{TryFrom, TryInto};
use std::fs::create_dir_all; use std::fs::create_dir_all;
use std::path::Path; use std::path::Path;
use std::result::Result as StdResult;
use std::str; use std::str;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use hmac::{Hmac, Mac}; use hmac::{Hmac, Mac};
use meilisearch_types::heed::BoxedError;
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::KeyId; use meilisearch_types::keys::KeyId;
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson}; use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use sha2::Sha256; use sha2::Sha256;
use thiserror::Error;
use time::OffsetDateTime; use time::OffsetDateTime;
use uuid::fmt::Hyphenated; use uuid::fmt::Hyphenated;
use uuid::Uuid; use uuid::Uuid;
@@ -33,7 +30,7 @@ const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expirat
#[derive(Clone)] #[derive(Clone)]
pub struct HeedAuthStore { pub struct HeedAuthStore {
env: Arc<Env>, env: Arc<Env>,
keys: Database<Bytes, SerdeJson<Key>>, keys: Database<ByteSlice, SerdeJson<Key>>,
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>, action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
should_close_on_drop: bool, should_close_on_drop: bool,
} }
@@ -279,7 +276,7 @@ impl HeedAuthStore {
fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> { fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
let mut iter = self let mut iter = self
.action_keyid_index_expiration .action_keyid_index_expiration
.remap_types::<Bytes, DecodeIgnore>() .remap_types::<ByteSlice, DecodeIgnore>()
.prefix_iter_mut(wtxn, key.as_bytes())?; .prefix_iter_mut(wtxn, key.as_bytes())?;
while iter.next().transpose()?.is_some() { while iter.next().transpose()?.is_some() {
// safety: we don't keep references from inside the LMDB database. // safety: we don't keep references from inside the LMDB database.
@@ -297,24 +294,23 @@ pub struct KeyIdActionCodec;
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec { impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
type DItem = (KeyId, Action, Option<&'a [u8]>); type DItem = (KeyId, Action, Option<&'a [u8]>);
fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> { fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (key_id_bytes, action_bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?; let (key_id_bytes, action_bytes) = try_split_array_at(bytes)?;
let (&action_byte, index) = let (action_bytes, index) = match try_split_array_at(action_bytes)? {
match try_split_array_at(action_bytes).ok_or(SliceTooShortError)? { (action, []) => (action, None),
([action], []) => (action, None), (action, index) => (action, Some(index)),
([action], index) => (action, Some(index)), };
};
let key_id = Uuid::from_bytes(*key_id_bytes); let key_id = Uuid::from_bytes(*key_id_bytes);
let action = Action::from_repr(action_byte).ok_or(InvalidActionError { action_byte })?; let action = Action::from_repr(u8::from_be_bytes(*action_bytes))?;
Ok((key_id, action, index)) Some((key_id, action, index))
} }
} }
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec { impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>); type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> { fn bytes_encode((key_id, action, index): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::new(); let mut bytes = Vec::new();
bytes.extend_from_slice(key_id.as_bytes()); bytes.extend_from_slice(key_id.as_bytes());
@@ -324,20 +320,10 @@ impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
bytes.extend_from_slice(index); bytes.extend_from_slice(index);
} }
Ok(Cow::Owned(bytes)) Some(Cow::Owned(bytes))
} }
} }
#[derive(Error, Debug)]
#[error("the slice is too short")]
pub struct SliceTooShortError;
#[derive(Error, Debug)]
#[error("cannot construct a valid Action from {action_byte}")]
pub struct InvalidActionError {
pub action_byte: u8,
}
pub fn generate_key_as_hexa(uid: Uuid, master_key: &[u8]) -> String { pub fn generate_key_as_hexa(uid: Uuid, master_key: &[u8]) -> String {
// format uid as hyphenated allowing user to generate their own keys. // format uid as hyphenated allowing user to generate their own keys.
let mut uid_buffer = [0; Hyphenated::LENGTH]; let mut uid_buffer = [0; Hyphenated::LENGTH];

View File

@@ -11,31 +11,31 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
actix-web = { version = "4.4.1", default-features = false } actix-web = { version = "4.3.1", default-features = false }
anyhow = "1.0.79" anyhow = "1.0.70"
convert_case = "0.6.0" convert_case = "0.6.0"
csv = "1.3.0" csv = "1.2.1"
deserr = { version = "0.6.1", features = ["actix-web"] } deserr = { version = "0.6.0", features = ["actix-web"]}
either = { version = "1.9.0", features = ["serde"] } either = { version = "1.8.1", features = ["serde"] }
enum-iterator = "1.5.0" enum-iterator = "1.4.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.28" flate2 = "1.0.25"
fst = "0.4.7" fst = "0.4.7"
memmap2 = "0.7.1" memmap2 = "0.7.1"
milli = { path = "../milli" } milli = { path = "../milli" }
roaring = { version = "0.10.2", features = ["serde"] } roaring = { path = "../../roaring-rs", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde-cs = "0.2.4" serde-cs = "0.2.4"
serde_json = "1.0.111" serde_json = "1.0.95"
tar = "0.4.40" tar = "0.4.38"
tempfile = "3.9.0" tempfile = "3.5.0"
thiserror = "1.0.56" thiserror = "1.0.40"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.35" tokio = "1.27"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
insta = "1.34.0" insta = "1.29.0"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
[features] [features]
@@ -50,9 +50,6 @@ hebrew = ["milli/hebrew"]
japanese = ["milli/japanese"] japanese = ["milli/japanese"]
# thai specialized tokenization # thai specialized tokenization
thai = ["milli/thai"] thai = ["milli/thai"]
# allow greek specialized tokenization # allow greek specialized tokenization
greek = ["milli/greek"] greek = ["milli/greek"]
# allow khmer specialized tokenization
khmer = ["milli/khmer"]
# allow vietnamese specialized tokenization
vietnamese = ["milli/vietnamese"]

View File

@@ -188,4 +188,3 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError);
merge_with_error_impl_take_error_message!(ParseTaskKindError); merge_with_error_impl_take_error_message!(ParseTaskKindError);
merge_with_error_impl_take_error_message!(ParseTaskStatusError); merge_with_error_impl_take_error_message!(ParseTaskStatusError);
merge_with_error_impl_take_error_message!(IndexUidFormatError); merge_with_error_impl_take_error_message!(IndexUidFormatError);
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);

View File

@@ -222,8 +222,6 @@ InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
InvalidEmbedder , InvalidRequest , BAD_REQUEST ;
InvalidHybridQuery , InvalidRequest , BAD_REQUEST ;
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
@@ -235,7 +233,6 @@ InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ; InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ; InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ; InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ; InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
@@ -255,11 +252,9 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ; InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ; InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ; InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ; InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ;
@@ -299,20 +294,15 @@ MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
MissingIndexUid , InvalidRequest , BAD_REQUEST ; MissingIndexUid , InvalidRequest , BAD_REQUEST ;
MissingMasterKey , Auth , UNAUTHORIZED ; MissingMasterKey , Auth , UNAUTHORIZED ;
MissingPayload , InvalidRequest , BAD_REQUEST ; MissingPayload , InvalidRequest , BAD_REQUEST ;
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ; MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
MissingTaskFilters , InvalidRequest , BAD_REQUEST ; MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY; NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ; PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
TaskNotFound , InvalidRequest , NOT_FOUND ; TaskNotFound , InvalidRequest , NOT_FOUND ;
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ; TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
TooManyVectors , InvalidRequest , BAD_REQUEST ;
UnretrievableDocument , Internal , BAD_REQUEST ; UnretrievableDocument , Internal , BAD_REQUEST ;
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ; UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE
// Experimental features
VectorEmbeddingError , InvalidRequest , BAD_REQUEST
} }
impl ErrorCode for JoinError { impl ErrorCode for JoinError {
@@ -334,6 +324,7 @@ impl ErrorCode for milli::Error {
UserError::SerdeJson(_) UserError::SerdeJson(_)
| UserError::InvalidLmdbOpenOptions | UserError::InvalidLmdbOpenOptions
| UserError::DocumentLimitReached | UserError::DocumentLimitReached
| UserError::AccessingSoftDeletedDocument { .. }
| UserError::UnknownInternalDocumentId { .. } => Code::Internal, | UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStoreFile, UserError::InvalidStoreFile => Code::InvalidStoreFile,
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice, UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
@@ -345,16 +336,6 @@ impl ErrorCode for milli::Error {
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
Code::InvalidDocumentId Code::InvalidDocumentId
} }
UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
UserError::InvalidFieldForSource { .. }
| UserError::MissingFieldForSource { .. }
| UserError::InvalidOpenAiModel { .. }
| UserError::InvalidOpenAiModelDimensions { .. }
| UserError::InvalidOpenAiModelDimensionsMax { .. }
| UserError::InvalidSettingsDimensions { .. }
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound, UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
UserError::MultiplePrimaryKeyCandidatesFound { .. } => { UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
Code::IndexPrimaryKeyMultipleCandidatesFound Code::IndexPrimaryKeyMultipleCandidatesFound
@@ -372,15 +353,11 @@ impl ErrorCode for milli::Error {
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions, UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType, UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
UserError::SortError(_) => Code::InvalidSearchSort, UserError::SortError(_) => Code::InvalidSearchSort,
UserError::InvalidMinTypoWordLenSetting(_, _) => { UserError::InvalidMinTypoWordLenSetting(_, _) => {
Code::InvalidSettingsTypoTolerance Code::InvalidSettingsTypoTolerance
} }
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
} }
} }
} }
@@ -410,11 +387,11 @@ impl ErrorCode for HeedError {
HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile, HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile,
HeedError::Io(e) => e.error_code(), HeedError::Io(e) => e.error_code(),
HeedError::Mdb(_) HeedError::Mdb(_)
| HeedError::Encoding(_) | HeedError::Encoding
| HeedError::Decoding(_) | HeedError::Decoding
| HeedError::InvalidDatabaseTyping | HeedError::InvalidDatabaseTyping
| HeedError::DatabaseClosing | HeedError::DatabaseClosing
| HeedError::BadOpenOptions { .. } => Code::Internal, | HeedError::BadOpenOptions => Code::Internal,
} }
} }
} }
@@ -468,15 +445,6 @@ impl fmt::Display for DeserrParseIntError {
} }
} }
impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`."
)
}
}
#[macro_export] #[macro_export]
macro_rules! internal_error { macro_rules! internal_error {
($target:ty : $($other:path), *) => { ($target:ty : $($other:path), *) => {

View File

@@ -3,14 +3,11 @@ use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
#[serde(rename_all = "camelCase", default)] #[serde(rename_all = "camelCase", default)]
pub struct RuntimeTogglableFeatures { pub struct RuntimeTogglableFeatures {
pub score_details: bool,
pub vector_store: bool, pub vector_store: bool,
pub metrics: bool,
pub logs_route: bool,
pub export_puffin_reports: bool,
} }
#[derive(Default, Debug, Clone, Copy)] #[derive(Default, Debug, Clone, Copy)]
pub struct InstanceTogglableFeatures { pub struct InstanceTogglableFeatures {
pub metrics: bool, pub metrics: bool,
pub logs_route: bool,
} }

View File

@@ -9,7 +9,6 @@ pub mod index_uid_pattern;
pub mod keys; pub mod keys;
pub mod settings; pub mod settings;
pub mod star_or; pub mod star_or;
pub mod task_view;
pub mod tasks; pub mod tasks;
pub mod versioning; pub mod versioning;
pub use milli::{heed, Index}; pub use milli::{heed, Index};

View File

@@ -8,7 +8,6 @@ use std::str::FromStr;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use fst::IntoStreamer; use fst::IntoStreamer;
use milli::proximity::ProximityPrecision;
use milli::update::Setting; use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET}; use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize, Serializer}; use serde::{Deserialize, Serialize, Serializer};
@@ -187,9 +186,6 @@ pub struct Settings<T> {
#[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)] #[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
pub distinct_attribute: Setting<String>, pub distinct_attribute: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
pub proximity_precision: Setting<ProximityPrecisionView>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)] #[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub typo_tolerance: Setting<TypoSettings>, pub typo_tolerance: Setting<TypoSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
@@ -199,10 +195,6 @@ pub struct Settings<T> {
#[deserr(default, error = DeserrJsonError<InvalidSettingsPagination>)] #[deserr(default, error = DeserrJsonError<InvalidSettingsPagination>)]
pub pagination: Setting<PaginationSettings>, pub pagination: Setting<PaginationSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>,
#[serde(skip)] #[serde(skip)]
#[deserr(skip)] #[deserr(skip)]
pub _kind: PhantomData<T>, pub _kind: PhantomData<T>,
@@ -222,11 +214,9 @@ impl Settings<Checked> {
separator_tokens: Setting::Reset, separator_tokens: Setting::Reset,
dictionary: Setting::Reset, dictionary: Setting::Reset,
distinct_attribute: Setting::Reset, distinct_attribute: Setting::Reset,
proximity_precision: Setting::Reset,
typo_tolerance: Setting::Reset, typo_tolerance: Setting::Reset,
faceting: Setting::Reset, faceting: Setting::Reset,
pagination: Setting::Reset, pagination: Setting::Reset,
embedders: Setting::Reset,
_kind: PhantomData, _kind: PhantomData,
} }
} }
@@ -244,11 +234,9 @@ impl Settings<Checked> {
dictionary, dictionary,
synonyms, synonyms,
distinct_attribute, distinct_attribute,
proximity_precision,
typo_tolerance, typo_tolerance,
faceting, faceting,
pagination, pagination,
embedders,
.. ..
} = self; } = self;
@@ -264,11 +252,9 @@ impl Settings<Checked> {
dictionary, dictionary,
synonyms, synonyms,
distinct_attribute, distinct_attribute,
proximity_precision,
typo_tolerance, typo_tolerance,
faceting, faceting,
pagination, pagination,
embedders,
_kind: PhantomData, _kind: PhantomData,
} }
} }
@@ -310,29 +296,12 @@ impl Settings<Unchecked> {
separator_tokens: self.separator_tokens, separator_tokens: self.separator_tokens,
dictionary: self.dictionary, dictionary: self.dictionary,
distinct_attribute: self.distinct_attribute, distinct_attribute: self.distinct_attribute,
proximity_precision: self.proximity_precision,
typo_tolerance: self.typo_tolerance, typo_tolerance: self.typo_tolerance,
faceting: self.faceting, faceting: self.faceting,
pagination: self.pagination, pagination: self.pagination,
embedders: self.embedders,
_kind: PhantomData, _kind: PhantomData,
} }
} }
pub fn validate(self) -> Result<Self, milli::Error> {
self.validate_embedding_settings()
}
fn validate_embedding_settings(mut self) -> Result<Self, milli::Error> {
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
for (name, config) in configs.iter_mut() {
let config_to_check = std::mem::take(config);
let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
*config = checked_config
}
self.embedders = Setting::Set(configs);
Ok(self)
}
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -421,12 +390,6 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (), Setting::NotSet => (),
} }
match settings.proximity_precision {
Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
Setting::Reset => builder.reset_proximity_precision(),
Setting::NotSet => (),
}
match settings.typo_tolerance { match settings.typo_tolerance {
Setting::Set(ref value) => { Setting::Set(ref value) => {
match value.enabled { match value.enabled {
@@ -513,12 +476,6 @@ pub fn apply_settings_to_builder(
Setting::Reset => builder.reset_pagination_max_total_hits(), Setting::Reset => builder.reset_pagination_max_total_hits(),
Setting::NotSet => (), Setting::NotSet => (),
} }
match settings.embedders.clone() {
Setting::Set(value) => builder.set_embedder_settings(value),
Setting::Reset => builder.reset_embedder_settings(),
Setting::NotSet => (),
}
} }
pub fn settings( pub fn settings(
@@ -552,8 +509,6 @@ pub fn settings(
let distinct_field = index.distinct_field(rtxn)?.map(String::from); let distinct_field = index.distinct_field(rtxn)?.map(String::from);
let proximity_precision = index.proximity_precision(rtxn)?.map(ProximityPrecisionView::from);
let synonyms = index.user_defined_synonyms(rtxn)?; let synonyms = index.user_defined_synonyms(rtxn)?;
let min_typo_word_len = MinWordSizeTyposSetting { let min_typo_word_len = MinWordSizeTyposSetting {
@@ -577,10 +532,7 @@ pub fn settings(
let faceting = FacetingSettings { let faceting = FacetingSettings {
max_values_per_facet: Setting::Set( max_values_per_facet: Setting::Set(
index index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET),
.max_values_per_facet(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET),
), ),
sort_facet_values_by: Setting::Set( sort_facet_values_by: Setting::Set(
index index
@@ -593,20 +545,10 @@ pub fn settings(
let pagination = PaginationSettings { let pagination = PaginationSettings {
max_total_hits: Setting::Set( max_total_hits: Setting::Set(
index index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
.pagination_max_total_hits(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
), ),
}; };
let embedders: BTreeMap<_, _> = index
.embedding_configs(rtxn)?
.into_iter()
.map(|(name, config)| (name, Setting::Set(config.into())))
.collect();
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
Ok(Settings { Ok(Settings {
displayed_attributes: match displayed_attributes { displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs), Some(attrs) => Setting::Set(attrs),
@@ -627,12 +569,10 @@ pub fn settings(
Some(field) => Setting::Set(field), Some(field) => Setting::Set(field),
None => Setting::Reset, None => Setting::Reset,
}, },
proximity_precision: Setting::Set(proximity_precision.unwrap_or_default()),
synonyms: Setting::Set(synonyms), synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance), typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting), faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination), pagination: Setting::Set(pagination),
embedders,
_kind: PhantomData, _kind: PhantomData,
}) })
} }
@@ -733,32 +673,6 @@ impl From<RankingRuleView> for Criterion {
} }
} }
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
pub enum ProximityPrecisionView {
#[default]
ByWord,
ByAttribute,
}
impl From<ProximityPrecision> for ProximityPrecisionView {
fn from(value: ProximityPrecision) -> Self {
match value {
ProximityPrecision::ByWord => ProximityPrecisionView::ByWord,
ProximityPrecision::ByAttribute => ProximityPrecisionView::ByAttribute,
}
}
}
impl From<ProximityPrecisionView> for ProximityPrecision {
fn from(value: ProximityPrecisionView) -> Self {
match value {
ProximityPrecisionView::ByWord => ProximityPrecision::ByWord,
ProximityPrecisionView::ByAttribute => ProximityPrecision::ByAttribute,
}
}
}
#[cfg(test)] #[cfg(test)]
pub(crate) mod test { pub(crate) mod test {
use super::*; use super::*;
@@ -778,11 +692,9 @@ pub(crate) mod test {
dictionary: Setting::NotSet, dictionary: Setting::NotSet,
synonyms: Setting::NotSet, synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet, distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet, typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet, faceting: Setting::NotSet,
pagination: Setting::NotSet, pagination: Setting::NotSet,
embedders: Setting::NotSet,
_kind: PhantomData::<Unchecked>, _kind: PhantomData::<Unchecked>,
}; };
@@ -804,11 +716,9 @@ pub(crate) mod test {
dictionary: Setting::NotSet, dictionary: Setting::NotSet,
synonyms: Setting::NotSet, synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet, distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet, typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet, faceting: Setting::NotSet,
pagination: Setting::NotSet, pagination: Setting::NotSet,
embedders: Setting::NotSet,
_kind: PhantomData::<Unchecked>, _kind: PhantomData::<Unchecked>,
}; };

View File

@@ -1,139 +0,0 @@
use serde::Serialize;
use time::{Duration, OffsetDateTime};
use crate::error::ResponseError;
use crate::settings::{Settings, Unchecked};
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskView {
pub uid: TaskId,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
#[serde(rename = "type")]
pub kind: Kind,
pub canceled_by: Option<TaskId>,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<DetailsView>,
pub error: Option<ResponseError>,
#[serde(serialize_with = "serialize_duration", default)]
pub duration: Option<Duration>,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option", default)]
pub started_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339::option", default)]
pub finished_at: Option<OffsetDateTime>,
}
impl TaskView {
pub fn from_task(task: &Task) -> TaskView {
TaskView {
uid: task.uid,
index_uid: task.index_uid().map(ToOwned::to_owned),
status: task.status,
kind: task.kind.as_kind(),
canceled_by: task.canceled_by,
details: task.details.clone().map(DetailsView::from),
error: task.error.clone(),
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
}
}
}
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub received_documents: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indexed_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub provided_ids: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub matched_tasks: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub canceled_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub original_filter: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub settings: Option<Box<Settings<Unchecked>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub swaps: Option<Vec<IndexSwap>>,
}
impl From<Details> for DetailsView {
fn from(details: Details) -> Self {
match details {
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
DetailsView {
received_documents: Some(received_documents),
indexed_documents: Some(indexed_documents),
..DetailsView::default()
}
}
Details::SettingsUpdate { settings } => {
DetailsView { settings: Some(settings), ..DetailsView::default() }
}
Details::IndexInfo { primary_key } => {
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
}
Details::DocumentDeletion {
provided_ids: received_document_ids,
deleted_documents,
} => DetailsView {
provided_ids: Some(received_document_ids),
deleted_documents: Some(deleted_documents),
original_filter: Some(None),
..DetailsView::default()
},
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
DetailsView {
provided_ids: Some(0),
original_filter: Some(Some(original_filter)),
deleted_documents: Some(deleted_documents),
..DetailsView::default()
}
}
Details::ClearAll { deleted_documents } => {
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
}
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
canceled_tasks: Some(canceled_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
deleted_tasks: Some(deleted_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::Dump { dump_uid } => {
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
}
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
}
}
}

View File

@@ -13,14 +13,14 @@ license.workspace = true
default-run = "meilisearch" default-run = "meilisearch"
[dependencies] [dependencies]
actix-cors = "0.7.0" actix-cors = "0.6.4"
actix-http = { version = "3.5.1", default-features = false, features = [ actix-http = { version = "3.3.1", default-features = false, features = [
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"rustls", "rustls",
] } ] }
actix-utils = "3.0.1" actix-utils = "3.0.1"
actix-web = { version = "4.4.1", default-features = false, features = [ actix-web = { version = "4.3.1", default-features = false, features = [
"macros", "macros",
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
@@ -28,115 +28,114 @@ actix-web = { version = "4.4.1", default-features = false, features = [
"rustls", "rustls",
] } ] }
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true } actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.79", features = ["backtrace"] } anyhow = { version = "1.0.70", features = ["backtrace"] }
async-stream = "0.3.5" async-stream = "0.3.5"
async-trait = "0.1.77" async-trait = "0.1.68"
bstr = "1.9.0" bstr = "1.4.0"
byte-unit = { version = "4.0.19", default-features = false, features = [ byte-unit = { version = "4.0.19", default-features = false, features = [
"std", "std",
"serde", "serde",
] } ] }
bytes = "1.5.0" bytes = "1.4.0"
clap = { version = "4.4.17", features = ["derive", "env"] } clap = { version = "4.2.1", features = ["derive", "env"] }
crossbeam-channel = "0.5.11" crossbeam-channel = "0.5.8"
deserr = { version = "0.6.1", features = ["actix-web"] } deserr = { version = "0.6.0", features = ["actix-web"]}
dump = { path = "../dump" } dump = { path = "../dump" }
either = "1.9.0" either = "1.8.1"
env_logger = "0.10.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.28" flate2 = "1.0.25"
fst = "0.4.7" fst = "0.4.7"
futures = "0.3.30" futures = "0.3.28"
futures-util = "0.3.30" futures-util = "0.3.28"
http = "0.2.11" http = "0.2.9"
index-scheduler = { path = "../index-scheduler" } index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.1.0", features = ["serde"] } indexmap = { version = "2.0.0", features = ["serde"] }
is-terminal = "0.4.10" is-terminal = "0.4.8"
itertools = "0.11.0" itertools = "0.11.0"
jsonwebtoken = "8.3.0" jsonwebtoken = "8.3.0"
lazy_static = "1.4.0" lazy_static = "1.4.0"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.39", default-features = false } mimalloc = { version = "0.1.37", default-features = false }
mime = "0.3.17" mime = "0.3.17"
num_cpus = "1.16.0" num_cpus = "1.15.0"
obkv = "0.2.1" obkv = "0.2.0"
once_cell = "1.19.0" once_cell = "1.17.1"
ordered-float = "4.2.0" ordered-float = "3.7.0"
parking_lot = "0.12.1" parking_lot = "0.12.1"
permissive-json-pointer = { path = "../permissive-json-pointer" } permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.13" pin-project-lite = "0.2.9"
platform-dirs = "0.3.0" platform-dirs = "0.3.0"
prometheus = { version = "0.13.3", features = ["process"] } prometheus = { version = "0.13.3", features = ["process"] }
puffin = { version = "0.16.0", features = ["serialization"] } puffin = "0.16.0"
puffin_http = { version = "0.13.0", optional = true }
rand = "0.8.5" rand = "0.8.5"
rayon = "1.8.0" rayon = "1.7.0"
regex = "1.10.2" regex = "1.7.3"
reqwest = { version = "0.11.23", features = [ reqwest = { version = "0.11.16", features = [
"rustls-tls", "rustls-tls",
"json", "json",
], default-features = false } ], default-features = false }
rustls = "0.20.8" rustls = "0.20.8"
rustls-pemfile = "1.0.2" rustls-pemfile = "1.0.2"
segment = { version = "0.2.3", optional = true } segment = { version = "0.2.2", optional = true }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
sha2 = "0.10.8" sha2 = "0.10.6"
siphasher = "1.0.0" siphasher = "0.3.10"
slice-group-by = "0.3.1" slice-group-by = "0.3.0"
static-files = { version = "0.2.3", optional = true } static-files = { version = "0.2.3", optional = true }
sysinfo = "0.30.5" sysinfo = "0.29.7"
tar = "0.4.40" tar = "0.4.38"
tempfile = "3.9.0" tempfile = "3.5.0"
thiserror = "1.0.56" thiserror = "1.0.40"
time = { version = "0.3.31", features = [ time = { version = "0.3.20", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
"macros", "macros",
] } ] }
tokio = { version = "1.35.1", features = ["full"] } tokio = { version = "1.27.0", features = ["full"] }
tokio-stream = "0.1.14" tokio-stream = "0.1.12"
toml = "0.8.8" toml = "0.7.3"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.3.1", features = ["serde", "v4"] }
walkdir = "2.4.0" walkdir = "2.3.3"
yaup = "0.2.1" yaup = "0.2.1"
serde_urlencoded = "0.7.1" serde_urlencoded = "0.7.1"
termcolor = "1.4.1" termcolor = "1.2.0"
url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.9"
[dev-dependencies] [dev-dependencies]
actix-rt = "2.9.0" actix-rt = "2.8.0"
assert-json-diff = "2.0.2" assert-json-diff = "2.0.2"
brotli = "3.4.0" brotli = "3.3.4"
insta = "1.34.0" insta = "1.29.0"
manifest-dir-macros = "0.1.18" manifest-dir-macros = "0.1.16"
maplit = "1.0.2" maplit = "1.0.2"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6" temp-env = "0.3.3"
urlencoding = "2.1.3" urlencoding = "2.1.2"
yaup = "0.2.1" yaup = "0.2.1"
[build-dependencies] [build-dependencies]
anyhow = { version = "1.0.79", optional = true } anyhow = { version = "1.0.70", optional = true }
cargo_toml = { version = "0.18.0", optional = true } cargo_toml = { version = "0.15.2", optional = true }
hex = { version = "0.4.3", optional = true } hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.11.23", features = [ reqwest = { version = "0.11.16", features = [
"blocking", "blocking",
"rustls-tls", "rustls-tls",
], default-features = false, optional = true } ], default-features = false, optional = true }
sha-1 = { version = "0.10.1", optional = true } sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.3", optional = true } static-files = { version = "0.2.3", optional = true }
tempfile = { version = "3.9.0", optional = true } tempfile = { version = "3.5.0", optional = true }
vergen = { version = "7.5.1", default-features = false, features = ["git"] } vergen = { version = "7.5.1", default-features = false, features = ["git"] }
zip = { version = "0.6.6", optional = true } zip = { version = "0.6.4", optional = true }
[features] [features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
analytics = ["segment"] analytics = ["segment"]
profile-with-puffin = ["dep:puffin_http"]
mini-dashboard = [ mini-dashboard = [
"actix-web-static-files", "actix-web-static-files",
"static-files", "static-files",
@@ -153,9 +152,7 @@ hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"] japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"] thai = ["meilisearch-types/thai"]
greek = ["meilisearch-types/greek"] greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"]
vietnamese = ["meilisearch-types/vietnamese"]
[package.metadata.mini-dashboard] [package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip" assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff" sha1 = "83cd44ed1e5f97ecb581dc9f958a63f4ccc982d9"

View File

@@ -18,7 +18,7 @@ use segment::message::{Identify, Track, User};
use segment::{AutoBatcher, Batcher, HttpClient}; use segment::{AutoBatcher, Batcher, HttpClient};
use serde::Serialize; use serde::Serialize;
use serde_json::{json, Value}; use serde_json::{json, Value};
use sysinfo::{Disks, System}; use sysinfo::{DiskExt, System, SystemExt};
use time::OffsetDateTime; use time::OffsetDateTime;
use tokio::select; use tokio::select;
use tokio::sync::mpsc::{self, Receiver, Sender}; use tokio::sync::mpsc::{self, Receiver, Sender};
@@ -28,9 +28,7 @@ use super::{
config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH, config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH,
}; };
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::option::{ use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
};
use crate::routes::indexes::documents::UpdateDocumentsQuery; use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::indexes::facet_search::FacetSearchQuery; use crate::routes::indexes::facet_search::FacetSearchQuery;
use crate::routes::tasks::TasksFilterQuery; use crate::routes::tasks::TasksFilterQuery;
@@ -38,7 +36,7 @@ use crate::routes::{create_all_stats, Stats};
use crate::search::{ use crate::search::{
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult, FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
}; };
use crate::Opt; use crate::Opt;
@@ -252,10 +250,7 @@ impl super::Analytics for SegmentAnalytics {
struct Infos { struct Infos {
env: String, env: String,
experimental_enable_metrics: bool, experimental_enable_metrics: bool,
experimental_logs_mode: LogMode,
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool, experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize,
db_path: bool, db_path: bool,
import_dump: bool, import_dump: bool,
dump_dir: bool, dump_dir: bool,
@@ -268,8 +263,6 @@ struct Infos {
ignore_snapshot_if_db_exists: bool, ignore_snapshot_if_db_exists: bool,
http_addr: bool, http_addr: bool,
http_payload_size_limit: Byte, http_payload_size_limit: Byte,
task_queue_webhook: bool,
task_webhook_authorization_header: bool,
log_level: String, log_level: String,
max_indexing_memory: MaxMemory, max_indexing_memory: MaxMemory,
max_indexing_threads: MaxThreads, max_indexing_threads: MaxThreads,
@@ -291,15 +284,10 @@ impl From<Opt> for Infos {
let Opt { let Opt {
db_path, db_path,
experimental_enable_metrics, experimental_enable_metrics,
experimental_logs_mode,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks,
http_addr, http_addr,
master_key: _, master_key: _,
env, env,
task_webhook_url,
task_webhook_authorization_header,
max_index_size: _, max_index_size: _,
max_task_db_size: _, max_task_db_size: _,
http_payload_size_limit, http_payload_size_limit,
@@ -339,8 +327,6 @@ impl From<Opt> for Infos {
Self { Self {
env, env,
experimental_enable_metrics, experimental_enable_metrics,
experimental_logs_mode,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
db_path: db_path != PathBuf::from("./data.ms"), db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(), import_dump: import_dump.is_some(),
@@ -354,9 +340,6 @@ impl From<Opt> for Infos {
ignore_snapshot_if_db_exists, ignore_snapshot_if_db_exists,
http_addr: http_addr != default_http_addr(), http_addr: http_addr != default_http_addr(),
http_payload_size_limit, http_payload_size_limit,
experimental_max_number_of_batched_tasks,
task_queue_webhook: task_webhook_url.is_some(),
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
log_level: log_level.to_string(), log_level: log_level.to_string(),
max_indexing_memory, max_indexing_memory,
max_indexing_threads, max_indexing_threads,
@@ -394,17 +377,16 @@ impl Segment {
fn compute_traits(opt: &Opt, stats: Stats) -> Value { fn compute_traits(opt: &Opt, stats: Stats) -> Value {
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now); static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
static SYSTEM: Lazy<Value> = Lazy::new(|| { static SYSTEM: Lazy<Value> = Lazy::new(|| {
let disks = Disks::new_with_refreshed_list();
let mut sys = System::new_all(); let mut sys = System::new_all();
sys.refresh_all(); sys.refresh_all();
let kernel_version = System::kernel_version() let kernel_version =
.and_then(|k| k.split_once('-').map(|(k, _)| k.to_string())); sys.kernel_version().and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
json!({ json!({
"distribution": System::name(), "distribution": sys.name(),
"kernel_version": kernel_version, "kernel_version": kernel_version,
"cores": sys.cpus().len(), "cores": sys.cpus().len(),
"ram_size": sys.total_memory(), "ram_size": sys.total_memory(),
"disk_size": disks.iter().map(|disk| disk.total_space()).max(), "disk_size": sys.disks().iter().map(|disk| disk.total_space()).max(),
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
}) })
}); });
@@ -601,11 +583,6 @@ pub struct SearchAggregator {
// vector // vector
// The maximum number of floats in a vector request // The maximum number of floats in a vector request
max_vector_size: usize, max_vector_size: usize,
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
semantic_ratio: bool,
// Whether a non-default embedder was specified
embedder: bool,
hybrid: bool,
// every time a search is done, we increment the counter linked to the used settings // every time a search is done, we increment the counter linked to the used settings
matching_strategy: HashMap<String, usize>, matching_strategy: HashMap<String, usize>,
@@ -659,7 +636,6 @@ impl SearchAggregator {
crop_marker, crop_marker,
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid,
} = query; } = query;
let mut ret = Self::default(); let mut ret = Self::default();
@@ -733,12 +709,6 @@ impl SearchAggregator {
ret.show_ranking_score = *show_ranking_score; ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details; ret.show_ranking_score_details = *show_ranking_score_details;
if let Some(hybrid) = hybrid {
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
ret.embedder = hybrid.embedder.is_some();
ret.hybrid = true;
}
ret ret
} }
@@ -792,9 +762,6 @@ impl SearchAggregator {
facets_total_number_of_facets, facets_total_number_of_facets,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
semantic_ratio,
embedder,
hybrid,
} = other; } = other;
if self.timestamp.is_none() { if self.timestamp.is_none() {
@@ -840,9 +807,6 @@ impl SearchAggregator {
// vector // vector
self.max_vector_size = self.max_vector_size.max(max_vector_size); self.max_vector_size = self.max_vector_size.max(max_vector_size);
self.semantic_ratio |= semantic_ratio;
self.hybrid |= hybrid;
self.embedder |= embedder;
// pagination // pagination
self.max_limit = self.max_limit.max(max_limit); self.max_limit = self.max_limit.max(max_limit);
@@ -911,9 +875,6 @@ impl SearchAggregator {
facets_total_number_of_facets, facets_total_number_of_facets,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
semantic_ratio,
embedder,
hybrid,
} = self; } = self;
if total_received == 0 { if total_received == 0 {
@@ -953,11 +914,6 @@ impl SearchAggregator {
"vector": { "vector": {
"max_vector_size": max_vector_size, "max_vector_size": max_vector_size,
}, },
"hybrid": {
"enabled": hybrid,
"semantic_ratio": semantic_ratio,
"embedder": embedder,
},
"pagination": { "pagination": {
"max_limit": max_limit, "max_limit": max_limit,
"max_offset": max_offset, "max_offset": max_offset,
@@ -1053,7 +1009,6 @@ impl MultiSearchAggregator {
crop_marker: _, crop_marker: _,
matching_strategy: _, matching_strategy: _,
attributes_to_search_on: _, attributes_to_search_on: _,
hybrid: _,
} = query; } = query;
index_uid.as_str() index_uid.as_str()
@@ -1200,7 +1155,6 @@ impl FacetSearchAggregator {
filter, filter,
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid,
} = query; } = query;
let mut ret = Self::default(); let mut ret = Self::default();
@@ -1214,8 +1168,7 @@ impl FacetSearchAggregator {
|| vector.is_some() || vector.is_some()
|| filter.is_some() || filter.is_some()
|| *matching_strategy != MatchingStrategy::default() || *matching_strategy != MatchingStrategy::default()
|| attributes_to_search_on.is_some() || attributes_to_search_on.is_some();
|| hybrid.is_some();
ret ret
} }

View File

@@ -12,8 +12,6 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}", #[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))] .0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>), MissingContentType(Vec<String>),
#[error("The `/logs/stream` route is currently in use by someone else.")]
AlreadyUsedLogRoute,
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")] #[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String), CsvDelimiterWithWrongContentType(String),
#[error( #[error(
@@ -53,15 +51,12 @@ pub enum MeilisearchHttpError {
DocumentFormat(#[from] DocumentFormatError), DocumentFormat(#[from] DocumentFormatError),
#[error(transparent)] #[error(transparent)]
Join(#[from] JoinError), Join(#[from] JoinError),
#[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")]
MissingSearchHybrid,
} }
impl ErrorCode for MeilisearchHttpError { impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code { fn error_code(&self) -> Code {
match self { match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType, MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType, MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
@@ -79,7 +74,6 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::FileStore(_) => Code::Internal, MeilisearchHttpError::FileStore(_) => Code::Internal,
MeilisearchHttpError::DocumentFormat(e) => e.error_code(), MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
MeilisearchHttpError::Join(_) => Code::Internal, MeilisearchHttpError::Join(_) => Code::Internal,
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
} }
} }
} }

View File

@@ -29,6 +29,7 @@ use error::PayloadError;
use extractors::payload::PayloadConfig; use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE; use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use log::error;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@@ -38,8 +39,6 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt; pub use option::Opt;
use option::ScheduleSnapshot; use option::ScheduleSnapshot;
use tracing::{error, info_span};
use tracing_subscriber::filter::Targets;
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
@@ -87,35 +86,10 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
} }
} }
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
pub type LogRouteHandle =
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
pub type LogRouteType = tracing_subscriber::filter::Filtered<
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
Targets,
tracing_subscriber::Registry,
>;
pub type SubscriberForSecondLayer = tracing_subscriber::layer::Layered<
tracing_subscriber::reload::Layer<LogRouteType, tracing_subscriber::Registry>,
tracing_subscriber::Registry,
>;
pub type LogStderrHandle =
tracing_subscriber::reload::Handle<LogStderrType, SubscriberForSecondLayer>;
pub type LogStderrType = tracing_subscriber::filter::Filtered<
Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
Targets,
SubscriberForSecondLayer,
>;
pub fn create_app( pub fn create_app(
index_scheduler: Data<IndexScheduler>, index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>, auth_controller: Data<AuthController>,
opt: Opt, opt: Opt,
logs: (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>, analytics: Arc<dyn Analytics>,
enable_dashboard: bool, enable_dashboard: bool,
) -> actix_web::App< ) -> actix_web::App<
@@ -134,14 +108,16 @@ pub fn create_app(
index_scheduler.clone(), index_scheduler.clone(),
auth_controller.clone(), auth_controller.clone(),
&opt, &opt,
logs,
analytics.clone(), analytics.clone(),
) )
}) })
.configure(routes::configure) .configure(routes::configure)
.configure(|s| dashboard(s, enable_dashboard)); .configure(|s| dashboard(s, enable_dashboard));
let app = app.wrap(middleware::RouteMetrics); let app = app.wrap(actix_web::middleware::Condition::new(
opt.experimental_enable_metrics,
middleware::RouteMetrics,
));
app.wrap( app.wrap(
Cors::default() Cors::default()
.send_wildcard() .send_wildcard()
@@ -150,49 +126,11 @@ pub fn create_app(
.allow_any_method() .allow_any_method()
.max_age(86_400), // 24h .max_age(86_400), // 24h
) )
.wrap(tracing_actix_web::TracingLogger::<AwebTracingLogger>::new()) .wrap(actix_web::middleware::Logger::default())
.wrap(actix_web::middleware::Compress::default()) .wrap(actix_web::middleware::Compress::default())
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim)) .wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
} }
struct AwebTracingLogger;
impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span {
use tracing::field::Empty;
let conn_info = request.connection_info();
let headers = request.headers();
let user_agent = headers
.get(http::header::USER_AGENT)
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
.unwrap_or_default();
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
}
fn on_request_end<B: MessageBody>(
span: tracing::Span,
outcome: &Result<ServiceResponse<B>, actix_web::Error>,
) {
match &outcome {
Ok(response) => {
let code: i32 = response.response().status().as_u16().into();
span.record("status_code", code);
if let Some(error) = response.response().error() {
// use the status code already constructed for the outgoing HTTP response
span.record("error", &tracing::field::display(error.as_response_error()));
}
}
Err(error) => {
let code: i32 = error.error_response().status().as_u16().into();
span.record("status_code", code);
span.record("error", &tracing::field::display(error.as_response_error()));
}
};
}
}
enum OnFailure { enum OnFailure {
RemoveDb, RemoveDb,
KeepDb, KeepDb,
@@ -293,15 +231,12 @@ fn open_or_create_database_unchecked(
indexes_path: opt.db_path.join("indexes"), indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(), snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(), dumps_path: opt.dump_dir.clone(),
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize, task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize, index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?, indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true, autobatching_enabled: true,
max_number_of_tasks: 1_000_000, max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
index_count: DEFAULT_INDEX_COUNT, index_count: DEFAULT_INDEX_COUNT,
instance_features, instance_features,
@@ -345,15 +280,15 @@ fn import_dump(
let mut dump_reader = dump::DumpReader::open(reader)?; let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() { if let Some(date) = dump_reader.date() {
tracing::info!( log::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version "Importing a dump of meilisearch `{:?}` from the {}",
%date, dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
"Importing a dump of meilisearch" date
); );
} else { } else {
tracing::info!( log::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version "Importing a dump of meilisearch `{:?}`",
"Importing a dump of meilisearch", dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
); );
} }
@@ -387,7 +322,7 @@ fn import_dump(
for index_reader in dump_reader.indexes()? { for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?; let mut index_reader = index_reader?;
let metadata = index_reader.metadata(); let metadata = index_reader.metadata();
tracing::info!("Importing index `{}`.", metadata.uid); log::info!("Importing index `{}`.", metadata.uid);
let date = Some((metadata.created_at, metadata.updated_at)); let date = Some((metadata.created_at, metadata.updated_at));
let index = index_scheduler.create_raw_index(&metadata.uid, date)?; let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
@@ -401,15 +336,14 @@ fn import_dump(
} }
// 4.2 Import the settings. // 4.2 Import the settings.
tracing::info!("Importing the settings."); log::info!("Importing the settings.");
let settings = index_reader.settings()?; let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder); apply_settings_to_builder(&settings, &mut builder);
builder builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
// 4.3 Import the documents. // 4.3 Import the documents.
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index. // 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents."); log::info!("Importing the documents.");
let file = tempfile::tempfile()?; let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file)); let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? { for document in index_reader.documents()? {
@@ -431,16 +365,15 @@ fn import_dump(
update_method: IndexDocumentsMethod::ReplaceDocuments, update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default() ..Default::default()
}, },
|indexing_step| tracing::trace!("update: {:?}", indexing_step), |indexing_step| log::debug!("update: {:?}", indexing_step),
|| false, || false,
)?; )?;
let (builder, user_result) = builder.add_documents(reader)?; let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?; log::info!("{} documents found.", user_result?);
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
builder.execute()?; builder.execute()?;
wtxn.commit()?; wtxn.commit()?;
tracing::info!("All documents successfully imported."); log::info!("All documents successfully imported.");
} }
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
@@ -458,7 +391,6 @@ pub fn configure_data(
index_scheduler: Data<IndexScheduler>, index_scheduler: Data<IndexScheduler>,
auth: Data<AuthController>, auth: Data<AuthController>,
opt: &Opt, opt: &Opt,
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>, analytics: Arc<dyn Analytics>,
) { ) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
@@ -466,11 +398,8 @@ pub fn configure_data(
.app_data(index_scheduler) .app_data(index_scheduler)
.app_data(auth) .app_data(auth)
.app_data(web::Data::from(analytics)) .app_data(web::Data::from(analytics))
.app_data(web::Data::new(logs_route))
.app_data(web::Data::new(logs_stderr))
.app_data( .app_data(
web::JsonConfig::default() web::JsonConfig::default()
.limit(http_payload_size_limit)
.content_type(|mime| mime == mime::APPLICATION_JSON) .content_type(|mime| mime == mime::APPLICATION_JSON)
.error_handler(|err, req: &HttpRequest| match err { .error_handler(|err, req: &HttpRequest| match err {
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) { JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {

View File

@@ -1,7 +1,6 @@
use std::env; use std::env;
use std::io::{stderr, LineWriter, Write}; use std::io::{stderr, Write};
use std::path::PathBuf; use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use actix_web::http::KeepAlive; use actix_web::http::KeepAlive;
@@ -10,78 +9,37 @@ use actix_web::HttpServer;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use is_terminal::IsTerminal; use is_terminal::IsTerminal;
use meilisearch::analytics::Analytics; use meilisearch::analytics::Analytics;
use meilisearch::option::LogMode; use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
use meilisearch::{
analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType,
LogStderrHandle, LogStderrType, Opt, SubscriberForSecondLayer,
};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE}; use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use mimalloc::MiMalloc;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt as _;
use tracing_subscriber::Layer;
#[global_allocator] #[global_allocator]
static ALLOC: MiMalloc = MiMalloc; static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn default_log_route_layer() -> LogRouteType {
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
}
fn default_log_stderr_layer(opt: &Opt) -> LogStderrType {
let layer = tracing_subscriber::fmt::layer()
.with_writer(|| LineWriter::new(std::io::stderr()))
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
let layer = match opt.experimental_logs_mode {
LogMode::Human => Box::new(layer)
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
LogMode::Json => Box::new(layer.json())
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
};
layer.with_filter(
tracing_subscriber::filter::Targets::new()
.with_target("", LevelFilter::from_str(&opt.log_level.to_string()).unwrap()),
)
}
/// does all the setup before meilisearch is launched /// does all the setup before meilisearch is launched
fn setup(opt: &Opt) -> anyhow::Result<(LogRouteHandle, LogStderrHandle)> { fn setup(opt: &Opt) -> anyhow::Result<()> {
let (route_layer, route_layer_handle) = let mut log_builder = env_logger::Builder::new();
tracing_subscriber::reload::Layer::new(default_log_route_layer()); log_builder.parse_filters(&opt.log_level.to_string());
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
let (stderr_layer, stderr_layer_handle) = log_builder.init();
tracing_subscriber::reload::Layer::new(default_log_stderr_layer(opt));
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
let subscriber = tracing_subscriber::registry().with(route_layer).with(stderr_layer); Ok(())
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
Ok((route_layer_handle, stderr_layer_handle))
}
fn on_panic(info: &std::panic::PanicInfo) {
let info = info.to_string().replace('\n', " ");
tracing::error!(%info);
} }
#[actix_web::main] #[actix_web::main]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?; let (opt, config_read_from) = Opt::try_build()?;
std::panic::set_hook(Box::new(on_panic)); #[cfg(feature = "profile-with-puffin")]
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
anyhow::ensure!( anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
); );
let log_handle = setup(&opt)?; setup(&opt)?;
match (opt.env.as_ref(), &opt.master_key) { match (opt.env.as_ref(), &opt.master_key) {
("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => { ("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => {
@@ -119,7 +77,7 @@ async fn main() -> anyhow::Result<()> {
print_launch_resume(&opt, analytics.clone(), config_read_from); print_launch_resume(&opt, analytics.clone(), config_read_from);
run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?; run_http(index_scheduler, auth_controller, opt, analytics).await?;
Ok(()) Ok(())
} }
@@ -128,7 +86,6 @@ async fn run_http(
index_scheduler: Arc<IndexScheduler>, index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>, auth_controller: Arc<AuthController>,
opt: Opt, opt: Opt,
logs: (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>, analytics: Arc<dyn Analytics>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let enable_dashboard = &opt.env == "development"; let enable_dashboard = &opt.env == "development";
@@ -141,7 +98,6 @@ async fn run_http(
index_scheduler.clone(), index_scheduler.clone(),
auth_controller.clone(), auth_controller.clone(),
opt.clone(), opt.clone(),
logs.clone(),
analytics.clone(), analytics.clone(),
enable_dashboard, enable_dashboard,
) )

View File

@@ -3,10 +3,8 @@
use std::future::{ready, Ready}; use std::future::{ready, Ready};
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform}; use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
use actix_web::web::Data;
use actix_web::Error; use actix_web::Error;
use futures_util::future::LocalBoxFuture; use futures_util::future::LocalBoxFuture;
use index_scheduler::IndexScheduler;
use prometheus::HistogramTimer; use prometheus::HistogramTimer;
pub struct RouteMetrics; pub struct RouteMetrics;
@@ -49,27 +47,19 @@ where
fn call(&self, req: ServiceRequest) -> Self::Future { fn call(&self, req: ServiceRequest) -> Self::Future {
let mut histogram_timer: Option<HistogramTimer> = None; let mut histogram_timer: Option<HistogramTimer> = None;
let request_path = req.path();
// calling unwrap here is safe because index scheduler is added to app data while creating actix app. let is_registered_resource = req.resource_map().has_resource(request_path);
// also, the tests will fail if this is not present. if is_registered_resource {
let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap(); let request_method = req.method().to_string();
let features = index_scheduler.features(); histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
if features.check_metrics().is_ok() {
let request_path = req.path();
let is_registered_resource = req.resource_map().has_resource(request_path);
if is_registered_resource {
let request_method = req.method().to_string();
histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, request_path])
.start_timer(),
);
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
.with_label_values(&[&request_method, request_path]) .with_label_values(&[&request_method, request_path])
.inc(); .start_timer(),
} );
}; crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
.with_label_values(&[&request_method, request_path])
.inc();
}
let fut = self.service.call(req); let fut = self.service.call(req);

View File

@@ -20,8 +20,7 @@ use rustls::server::{
use rustls::RootCertStore; use rustls::RootCertStore;
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use sysinfo::{MemoryRefreshKind, RefreshKind, System}; use sysinfo::{RefreshKind, System, SystemExt};
use url::Url;
const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
@@ -29,8 +28,6 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR"; const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY"; const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
const MEILI_ENV: &str = "MEILI_ENV"; const MEILI_ENV: &str = "MEILI_ENV";
const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
#[cfg(feature = "analytics")] #[cfg(feature = "analytics")]
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS"; const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT"; const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
@@ -51,13 +48,9 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE";
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms"; const DEFAULT_DB_PATH: &str = "./data.ms";
@@ -80,39 +73,6 @@ const DEFAULT_LOG_EVERY_N: usize = 100_000;
pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB
pub const TASK_DB_SIZE: u64 = 20 * 1024 * 1024 * 1024; // 20 GiB pub const TASK_DB_SIZE: u64 = 20 * 1024 * 1024 * 1024; // 20 GiB
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")]
pub enum LogMode {
#[default]
Human,
Json,
}
impl Display for LogMode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
LogMode::Human => Display::fmt("HUMAN", f),
LogMode::Json => Display::fmt("JSON", f),
}
}
}
impl FromStr for LogMode {
type Err = LogModeError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.trim().to_lowercase().as_str() {
"human" => Ok(LogMode::Human),
"json" => Ok(LogMode::Json),
_ => Err(LogModeError(s.to_owned())),
}
}
}
#[derive(Debug, thiserror::Error)]
#[error("Unsupported log mode level `{0}`. Supported values are `HUMAN` and `JSON`.")]
pub struct LogModeError(String);
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] #[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")] #[serde(rename_all = "UPPERCASE")]
pub enum LogLevel { pub enum LogLevel {
@@ -194,14 +154,6 @@ pub struct Opt {
#[serde(default = "default_env")] #[serde(default = "default_env")]
pub env: String, pub env: String,
/// Called whenever a task finishes so a third party can be notified.
#[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
pub task_webhook_url: Option<Url>,
/// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified.
#[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)]
pub task_webhook_authorization_header: Option<String>,
/// Deactivates Meilisearch's built-in telemetry when provided. /// Deactivates Meilisearch's built-in telemetry when provided.
/// ///
/// Meilisearch automatically collects data from all instances that do not opt out using this flag. /// Meilisearch automatically collects data from all instances that do not opt out using this flag.
@@ -344,30 +296,11 @@ pub struct Opt {
#[serde(default)] #[serde(default)]
pub experimental_enable_metrics: bool, pub experimental_enable_metrics: bool,
/// Experimental logs mode feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/723>
///
/// Change the mode of the logs on the console.
#[clap(long, env = MEILI_EXPERIMENTAL_LOGS_MODE, default_value_t)]
#[serde(default)]
pub experimental_logs_mode: LogMode,
/// Experimental logs route feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/721>
///
/// Enables the log routes on the `POST /logs/stream`, `POST /logs/stderr` endpoints, and the `DELETE /logs/stream` to stop receiving logs.
#[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)]
#[serde(default)]
pub experimental_enable_logs_route: bool,
/// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652> /// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
#[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
#[serde(default)] #[serde(default)]
pub experimental_reduce_indexing_memory_usage: bool, pub experimental_reduce_indexing_memory_usage: bool,
/// Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
#[clap(long, env = MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())]
#[serde(default = "default_limit_batched_tasks")]
pub experimental_max_number_of_batched_tasks: usize,
#[serde(flatten)] #[serde(flatten)]
#[clap(flatten)] #[clap(flatten)]
pub indexer_options: IndexerOpts, pub indexer_options: IndexerOpts,
@@ -435,12 +368,9 @@ impl Opt {
http_addr, http_addr,
master_key, master_key,
env, env,
task_webhook_url,
task_webhook_authorization_header,
max_index_size: _, max_index_size: _,
max_task_db_size: _, max_task_db_size: _,
http_payload_size_limit, http_payload_size_limit,
experimental_max_number_of_batched_tasks,
ssl_cert_path, ssl_cert_path,
ssl_key_path, ssl_key_path,
ssl_auth_path, ssl_auth_path,
@@ -462,10 +392,8 @@ impl Opt {
config_file_path: _, config_file_path: _,
#[cfg(feature = "analytics")] #[cfg(feature = "analytics")]
no_analytics, no_analytics,
experimental_enable_metrics, experimental_enable_metrics: enable_metrics_route,
experimental_logs_mode, experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
} = self; } = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -473,16 +401,6 @@ impl Opt {
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key); export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
} }
export_to_env_if_not_present(MEILI_ENV, env); export_to_env_if_not_present(MEILI_ENV, env);
if let Some(task_webhook_url) = task_webhook_url {
export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
}
if let Some(task_webhook_authorization_header) = task_webhook_authorization_header {
export_to_env_if_not_present(
MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER,
task_webhook_authorization_header,
);
}
#[cfg(feature = "analytics")] #[cfg(feature = "analytics")]
{ {
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
@@ -491,10 +409,6 @@ impl Opt {
MEILI_HTTP_PAYLOAD_SIZE_LIMIT, MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
http_payload_size_limit.to_string(), http_payload_size_limit.to_string(),
); );
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
experimental_max_number_of_batched_tasks.to_string(),
);
if let Some(ssl_cert_path) = ssl_cert_path { if let Some(ssl_cert_path) = ssl_cert_path {
export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path); export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
} }
@@ -519,19 +433,11 @@ impl Opt {
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string()); export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
export_to_env_if_not_present( export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_METRICS, MEILI_EXPERIMENTAL_ENABLE_METRICS,
experimental_enable_metrics.to_string(), enable_metrics_route.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_LOGS_MODE,
experimental_logs_mode.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE,
experimental_enable_logs_route.to_string(),
); );
export_to_env_if_not_present( export_to_env_if_not_present(
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
experimental_reduce_indexing_memory_usage.to_string(), reduce_indexing_memory_usage.to_string(),
); );
indexer_options.export_to_env(); indexer_options.export_to_env();
} }
@@ -583,10 +489,7 @@ impl Opt {
} }
pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures { pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures {
InstanceTogglableFeatures { InstanceTogglableFeatures { metrics: self.experimental_enable_metrics }
metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route,
}
} }
} }
@@ -695,8 +598,8 @@ impl MaxMemory {
/// Returns the total amount of bytes available or `None` if this system isn't supported. /// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> { fn total_memory_bytes() -> Option<u64> {
if sysinfo::IS_SUPPORTED_SYSTEM { if System::IS_SUPPORTED {
let memory_kind = RefreshKind::new().with_memory(MemoryRefreshKind::new().with_ram()); let memory_kind = RefreshKind::new().with_memory();
let mut system = System::new_with_specifics(memory_kind); let mut system = System::new_with_specifics(memory_kind);
system.refresh_memory(); system.refresh_memory();
Some(system.total_memory()) Some(system.total_memory())
@@ -824,10 +727,6 @@ fn default_http_payload_size_limit() -> Byte {
Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap() Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap()
} }
fn default_limit_batched_tasks() -> usize {
usize::MAX
}
fn default_snapshot_dir() -> PathBuf { fn default_snapshot_dir() -> PathBuf {
PathBuf::from(DEFAULT_SNAPSHOT_DIR) PathBuf::from(DEFAULT_SNAPSHOT_DIR)
} }

View File

@@ -1,11 +1,11 @@
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@@ -32,6 +32,6 @@ pub async fn create_dump(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create dump"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }

View File

@@ -3,11 +3,11 @@ use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use deserr::Deserr; use deserr::Deserr;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::policies::ActionPolicy;
@@ -29,26 +29,21 @@ async fn get_features(
>, >,
req: HttpRequest, req: HttpRequest,
analytics: Data<dyn Analytics>, analytics: Data<dyn Analytics>,
) -> HttpResponse { ) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features(); let features = index_scheduler.features()?;
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req)); analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
let features = features.runtime_features(); debug!("returns: {:?}", features.runtime_features());
debug!(returns = ?features, "Get features"); Ok(HttpResponse::Ok().json(features.runtime_features()))
HttpResponse::Ok().json(features)
} }
#[derive(Debug, Deserr)] #[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct RuntimeTogglableFeatures { pub struct RuntimeTogglableFeatures {
#[deserr(default)]
pub score_details: Option<bool>,
#[deserr(default)] #[deserr(default)]
pub vector_store: Option<bool>, pub vector_store: Option<bool>,
#[deserr(default)]
pub metrics: Option<bool>,
#[deserr(default)]
pub logs_route: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>,
} }
async fn patch_features( async fn patch_features(
@@ -60,41 +55,29 @@ async fn patch_features(
req: HttpRequest, req: HttpRequest,
analytics: Data<dyn Analytics>, analytics: Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features(); let features = index_scheduler.features()?;
debug!(parameters = ?new_features, "Patch features");
let old_features = features.runtime_features(); let old_features = features.runtime_features();
let new_features = meilisearch_types::features::RuntimeTogglableFeatures { let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store), vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
export_puffin_reports: new_features
.0
.export_puffin_reports
.unwrap_or(old_features.export_puffin_reports),
}; };
// explicitly destructure for analytics rather than using the `Serialize` implementation, because // explicitly destructure for analytics rather than using the `Serialize` implementation, because
// the it renames to camelCase, which we don't want for analytics. // the it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures { let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } =
vector_store, new_features;
metrics,
logs_route,
export_puffin_reports,
} = new_features;
analytics.publish( analytics.publish(
"Experimental features Updated".to_string(), "Experimental features Updated".to_string(),
json!({ json!({
"score_details": score_details,
"vector_store": vector_store, "vector_store": vector_store,
"metrics": metrics,
"logs_route": logs_route,
"export_puffin_reports": export_puffin_reports,
}), }),
Some(&req), Some(&req),
); );
index_scheduler.put_runtime_features(new_features)?; index_scheduler.put_runtime_features(new_features)?;
debug!(returns = ?new_features, "Patch features");
Ok(HttpResponse::Ok().json(new_features)) Ok(HttpResponse::Ok().json(new_features))
} }

View File

@@ -3,11 +3,12 @@ use std::io::ErrorKind;
use actix_web::http::header::CONTENT_TYPE; use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse}; use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
use bstr::ByteSlice as _; use bstr::ByteSlice;
use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr; use deserr::Deserr;
use futures::StreamExt; use futures::StreamExt;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
@@ -27,7 +28,6 @@ use serde_json::Value;
use tempfile::tempfile; use tempfile::tempfile;
use tokio::fs::File; use tokio::fs::File;
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
use tracing::debug;
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
@@ -101,7 +101,6 @@ pub async fn get_document(
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = document_param.into_inner(); let DocumentParam { index_uid, document_id } = document_param.into_inner();
debug!(parameters = ?params, "Get document");
let index_uid = IndexUid::try_from(index_uid)?; let index_uid = IndexUid::try_from(index_uid)?;
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req); analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
@@ -111,7 +110,7 @@ pub async fn get_document(
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?; let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
debug!(returns = ?document, "Get document"); debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document)) Ok(HttpResponse::Ok().json(document))
} }
@@ -132,7 +131,7 @@ pub async fn delete_document(
}; };
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete document"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -169,8 +168,9 @@ pub async fn documents_by_query_post(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!("called with body: {:?}", body);
let body = body.into_inner(); let body = body.into_inner();
debug!(parameters = ?body, "Get documents POST");
analytics.post_fetch_documents( analytics.post_fetch_documents(
&DocumentFetchKind::Normal { &DocumentFetchKind::Normal {
@@ -191,7 +191,7 @@ pub async fn get_documents(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET"); debug!("called with params: {:?}", params);
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner(); let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
@@ -235,7 +235,7 @@ fn documents_by_query(
let ret = PaginationView::new(offset, limit, total as usize, documents); let ret = PaginationView::new(offset, limit, total as usize, documents);
debug!(returns = ?ret, "Get documents"); debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret)) Ok(HttpResponse::Ok().json(ret))
} }
@@ -271,7 +271,7 @@ pub async fn replace_documents(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!(parameters = ?params, "Replace documents"); debug!("called with params: {:?}", params);
let params = params.into_inner(); let params = params.into_inner();
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req); analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@@ -288,7 +288,6 @@ pub async fn replace_documents(
allow_index_creation, allow_index_creation,
) )
.await?; .await?;
debug!(returns = ?task, "Replace documents");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -303,8 +302,8 @@ pub async fn update_documents(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
let params = params.into_inner(); let params = params.into_inner();
debug!(parameters = ?params, "Update documents");
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req); analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@@ -320,7 +319,6 @@ pub async fn update_documents(
allow_index_creation, allow_index_creation,
) )
.await?; .await?;
debug!(returns = ?task, "Update documents");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -429,10 +427,7 @@ async fn document_addition(
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e))) Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
if e.kind() == ErrorKind::NotFound => {} if e.kind() == ErrorKind::NotFound => {}
Err(e) => { Err(e) => {
tracing::warn!( log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}");
index_uuid = %uuid,
"Unknown error happened while deleting a malformed update file: {e}"
);
} }
} }
// We still want to return the original error to the end user. // We still want to return the original error to the end user.
@@ -458,6 +453,7 @@ async fn document_addition(
} }
}; };
debug!("returns: {:?}", task);
Ok(task.into()) Ok(task.into())
} }
@@ -468,7 +464,7 @@ pub async fn delete_documents_batch(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by batch"); debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
@@ -483,7 +479,7 @@ pub async fn delete_documents_batch(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete documents by batch"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -501,7 +497,7 @@ pub async fn delete_documents_by_filter(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by filter"); debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner(); let index_uid = index_uid.into_inner();
let filter = body.into_inner().filter; let filter = body.into_inner().filter;
@@ -519,7 +515,7 @@ pub async fn delete_documents_by_filter(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete documents by filter"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -536,7 +532,7 @@ pub async fn clear_all_documents(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete all documents"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -616,8 +612,8 @@ fn retrieve_document<S: AsRef<str>>(
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let internal_id = index let internal_id = index
.external_documents_ids() .external_documents_ids(&txn)?
.get(&txn, doc_id)? .get(doc_id.as_bytes())
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?; .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = index let document = index

View File

@@ -2,20 +2,20 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use serde_json::Value; use serde_json::Value;
use tracing::debug;
use crate::analytics::{Analytics, FacetSearchAggregator}; use crate::analytics::{Analytics, FacetSearchAggregator};
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData; use crate::extractors::authentication::GuardedData;
use crate::search::{ use crate::search::{
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery, add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
}; };
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
@@ -36,8 +36,6 @@ pub struct FacetSearchQuery {
pub q: Option<String>, pub q: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)] #[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
pub vector: Option<Vec<f32>>, pub vector: Option<Vec<f32>>,
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
pub hybrid: Option<HybridQuery>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)] #[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>, pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
@@ -56,7 +54,7 @@ pub async fn search(
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner(); let query = params.into_inner();
debug!(parameters = ?query, "Facet search"); debug!("facet search called with params: {:?}", query);
let mut aggregate = FacetSearchAggregator::from_query(&query, &req); let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
@@ -70,7 +68,7 @@ pub async fn search(
} }
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features(); let features = index_scheduler.features()?;
let search_result = tokio::task::spawn_blocking(move || { let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(&index, search_query, facet_query, facet_name, features) perform_facet_search(&index, search_query, facet_query, facet_name, features)
}) })
@@ -83,7 +81,7 @@ pub async fn search(
let search_result = search_result?; let search_result = search_result?;
debug!(returns = ?search_result, "Facet search"); debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result)) Ok(HttpResponse::Ok().json(search_result))
} }
@@ -97,7 +95,6 @@ impl From<FacetSearchQuery> for SearchQuery {
filter, filter,
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid,
} = value; } = value;
SearchQuery { SearchQuery {
@@ -122,7 +119,6 @@ impl From<FacetSearchQuery> for SearchQuery {
matching_strategy, matching_strategy,
vector, vector,
attributes_to_search_on, attributes_to_search_on,
hybrid,
} }
} }
} }

View File

@@ -5,6 +5,7 @@ use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::{DeserializeError, Deserr, ValuePointerRef}; use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
@@ -15,7 +16,6 @@ use meilisearch_types::tasks::KindWithContent;
use serde::Serialize; use serde::Serialize;
use serde_json::json; use serde_json::json;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::analytics::Analytics; use crate::analytics::Analytics;
@@ -93,7 +93,6 @@ pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>, paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?paginate, "List indexes");
let filters = index_scheduler.filters(); let filters = index_scheduler.filters();
let indexes: Vec<Option<IndexView>> = let indexes: Vec<Option<IndexView>> =
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> { index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
@@ -106,7 +105,7 @@ pub async fn list_indexes(
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect(); let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter()); let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
debug!(returns = ?ret, "List indexes"); debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret)) Ok(HttpResponse::Ok().json(ret))
} }
@@ -125,7 +124,6 @@ pub async fn create_index(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Create index");
let IndexCreateRequest { primary_key, uid } = body.into_inner(); let IndexCreateRequest { primary_key, uid } = body.into_inner();
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid); let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
@@ -139,7 +137,6 @@ pub async fn create_index(
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create index");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} else { } else {
@@ -180,7 +177,7 @@ pub async fn get_index(
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let index_view = IndexView::new(index_uid.into_inner(), &index)?; let index_view = IndexView::new(index_uid.into_inner(), &index)?;
debug!(returns = ?index_view, "Get index"); debug!("returns: {:?}", index_view);
Ok(HttpResponse::Ok().json(index_view)) Ok(HttpResponse::Ok().json(index_view))
} }
@@ -192,7 +189,7 @@ pub async fn update_index(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Update index"); debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner(); let body = body.into_inner();
analytics.publish( analytics.publish(
@@ -209,7 +206,7 @@ pub async fn update_index(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Update index"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -221,7 +218,6 @@ pub async fn delete_index(
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete index");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -259,6 +255,6 @@ pub async fn get_index_stats(
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?); let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
debug!(returns = ?stats, "Get index stats"); debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats)) Ok(HttpResponse::Ok().json(stats))
} }

View File

@@ -2,25 +2,23 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli;
use meilisearch_types::milli::vector::DistributionShift;
use meilisearch_types::serde_cs::vec::CS; use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value; use serde_json::Value;
use tracing::{debug, warn};
use crate::analytics::{Analytics, SearchAggregator}; use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData; use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{ use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio, add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
}; };
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
@@ -76,31 +74,6 @@ pub struct SearchQueryGet {
matching_strategy: MatchingStrategy, matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToSearchOn>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToSearchOn>)]
pub attributes_to_search_on: Option<CS<String>>, pub attributes_to_search_on: Option<CS<String>>,
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
pub hybrid_embedder: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
pub struct SemanticRatioGet(SemanticRatio);
impl std::convert::TryFrom<String> for SemanticRatioGet {
type Error = InvalidSearchSemanticRatio;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f32 = s.parse().map_err(|_| InvalidSearchSemanticRatio)?;
Ok(SemanticRatioGet(SemanticRatio::try_from(f)?))
}
}
impl std::ops::Deref for SemanticRatioGet {
type Target = SemanticRatio;
fn deref(&self) -> &Self::Target {
&self.0
}
} }
impl From<SearchQueryGet> for SearchQuery { impl From<SearchQueryGet> for SearchQuery {
@@ -113,20 +86,6 @@ impl From<SearchQueryGet> for SearchQuery {
None => None, None => None,
}; };
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
(None, None) => None,
(None, Some(semantic_ratio)) => {
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None })
}
(Some(embedder), None) => Some(HybridQuery {
semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
embedder: Some(embedder),
}),
(Some(embedder), Some(semantic_ratio)) => {
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) })
}
};
Self { Self {
q: other.q, q: other.q,
vector: other.vector.map(CS::into_inner), vector: other.vector.map(CS::into_inner),
@@ -149,7 +108,6 @@ impl From<SearchQueryGet> for SearchQuery {
crop_marker: other.crop_marker, crop_marker: other.crop_marker,
matching_strategy: other.matching_strategy, matching_strategy: other.matching_strategy,
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()), attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
hybrid,
} }
} }
} }
@@ -186,7 +144,7 @@ pub async fn search_with_url_query(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Search get"); debug!("called with params: {:?}", params);
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().into(); let mut query: SearchQuery = params.into_inner().into();
@@ -199,13 +157,9 @@ pub async fn search_with_url_query(
let mut aggregate = SearchAggregator::from_query(&query, &req); let mut aggregate = SearchAggregator::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features(); let features = index_scheduler.features()?;
let distribution = embed(&mut query, index_scheduler.get_ref(), &index).await?;
let search_result = let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution)) tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
.await?;
if let Ok(ref search_result) = search_result { if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result); aggregate.succeed(search_result);
} }
@@ -213,7 +167,7 @@ pub async fn search_with_url_query(
let search_result = search_result?; let search_result = search_result?;
debug!(returns = ?search_result, "Search get"); debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result)) Ok(HttpResponse::Ok().json(search_result))
} }
@@ -227,7 +181,7 @@ pub async fn search_with_post(
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query = params.into_inner(); let mut query = params.into_inner();
debug!(parameters = ?query, "Search post"); debug!("search called with params: {:?}", query);
// Tenant token search_rules. // Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
@@ -238,13 +192,9 @@ pub async fn search_with_post(
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features(); let features = index_scheduler.features()?;
let distribution = embed(&mut query, index_scheduler.get_ref(), &index).await?;
let search_result = let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution)) tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
.await?;
if let Ok(ref search_result) = search_result { if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result); aggregate.succeed(search_result);
} }
@@ -252,84 +202,10 @@ pub async fn search_with_post(
let search_result = search_result?; let search_result = search_result?;
debug!(returns = ?search_result, "Search post"); debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result)) Ok(HttpResponse::Ok().json(search_result))
} }
pub async fn embed(
query: &mut SearchQuery,
index_scheduler: &IndexScheduler,
index: &milli::Index,
) -> Result<Option<DistributionShift>, ResponseError> {
match (&query.hybrid, &query.vector, &query.q) {
(Some(HybridQuery { semantic_ratio: _, embedder }), None, Some(q))
if !q.trim().is_empty() =>
{
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let embedder = if let Some(embedder_name) = embedder {
embedders.get(embedder_name)
} else {
embedders.get_default()
};
let embedder = embedder
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
.map_err(milli::Error::from)?
.0;
let distribution = embedder.distribution();
let embeddings = embedder
.embed(vec![q.to_owned()])
.await
.map_err(milli::vector::Error::from)
.map_err(milli::Error::from)?
.pop()
.expect("No vector returned from embedding");
if embeddings.iter().nth(1).is_some() {
warn!("Ignoring embeddings past the first one in long search query");
query.vector = Some(embeddings.iter().next().unwrap().to_vec());
} else {
query.vector = Some(embeddings.into_inner());
}
Ok(distribution)
}
(Some(hybrid), vector, _) => {
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let embedder = if let Some(embedder_name) = &hybrid.embedder {
embedders.get(embedder_name)
} else {
embedders.get_default()
};
let embedder = embedder
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
.map_err(milli::Error::from)?
.0;
if let Some(vector) = vector {
if vector.len() != embedder.dimensions() {
return Err(meilisearch_types::milli::Error::UserError(
meilisearch_types::milli::UserError::InvalidVectorDimensions {
expected: embedder.dimensions(),
found: vector.len(),
},
)
.into());
}
}
Ok(embedder.distribution())
}
_ => Ok(None),
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;

View File

@@ -2,15 +2,14 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked}; use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@@ -24,12 +23,12 @@ macro_rules! make_setting_route {
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse, Resource}; use actix_web::{web, HttpRequest, HttpResponse, Resource};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, Settings}; use meilisearch_types::settings::{settings, Settings};
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use $crate::analytics::Analytics; use $crate::analytics::Analytics;
use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::policies::*;
use $crate::extractors::authentication::GuardedData; use $crate::extractors::authentication::GuardedData;
@@ -61,7 +60,7 @@ macro_rules! make_setting_route {
.await?? .await??
.into(); .into();
debug!(returns = ?task, "Delete settings"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -78,9 +77,7 @@ macro_rules! make_setting_route {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner(); let body = body.into_inner();
debug!(parameters = ?body, "Update settings");
#[allow(clippy::redundant_closure_call)]
$analytics(&body, &req); $analytics(&body, &req);
let new_settings = Settings { let new_settings = Settings {
@@ -91,11 +88,6 @@ macro_rules! make_setting_route {
..Default::default() ..Default::default()
}; };
let new_settings = $crate::routes::indexes::settings::validate_settings(
new_settings,
&index_scheduler,
)?;
let allow_index_creation = let allow_index_creation =
index_scheduler.filters().allow_index_creation(&index_uid); index_scheduler.filters().allow_index_creation(&index_uid);
@@ -110,7 +102,7 @@ macro_rules! make_setting_route {
.await?? .await??
.into(); .into();
debug!(returns = ?task, "Update settings"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -127,7 +119,7 @@ macro_rules! make_setting_route {
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let settings = settings(&index, &rtxn)?; let settings = settings(&index, &rtxn)?;
debug!(returns = ?settings, "Update settings"); debug!("returns: {:?}", settings);
let mut json = serde_json::json!(&settings); let mut json = serde_json::json!(&settings);
let val = json[$camelcase_attr].take(); let val = json[$camelcase_attr].take();
@@ -442,31 +434,6 @@ make_setting_route!(
} }
); );
make_setting_route!(
"/proximity-precision",
put,
meilisearch_types::settings::ProximityPrecisionView,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
>,
proximity_precision,
"proximityPrecision",
analytics,
|precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"ProximityPrecision Updated".to_string(),
json!({
"proximity_precision": {
"set": precision.is_some(),
"value": precision.unwrap_or_default(),
}
}),
Some(req),
);
}
);
make_setting_route!( make_setting_route!(
"/ranking-rules", "/ranking-rules",
put, put,
@@ -553,67 +520,6 @@ make_setting_route!(
} }
); );
make_setting_route!(
"/embedders",
patch,
std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
>,
embedders,
"embedders",
analytics,
|setting: &Option<std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>>, req: &HttpRequest| {
analytics.publish(
"Embedders Updated".to_string(),
serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}),
Some(req),
);
}
);
fn embedder_analytics(
setting: Option<
&std::collections::BTreeMap<
String,
Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>,
>,
>,
) -> serde_json::Value {
let mut sources = std::collections::HashSet::new();
if let Some(s) = &setting {
for source in s
.values()
.filter_map(|config| config.clone().set())
.filter_map(|config| config.source.set())
{
use meilisearch_types::milli::vector::settings::EmbedderSource;
match source {
EmbedderSource::OpenAi => sources.insert("openAi"),
EmbedderSource::HuggingFace => sources.insert("huggingFace"),
EmbedderSource::UserProvided => sources.insert("userProvided"),
};
}
};
let document_template_used = setting.as_ref().map(|map| {
map.values()
.filter_map(|config| config.clone().set())
.any(|config| config.document_template.set().is_some())
});
json!(
{
"total": setting.as_ref().map(|s| s.len()),
"sources": sources,
"document_template_used": document_template_used,
}
)
}
macro_rules! generate_configure { macro_rules! generate_configure {
($($mod:ident),*) => { ($($mod:ident),*) => {
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
@@ -634,7 +540,6 @@ generate_configure!(
displayed_attributes, displayed_attributes,
searchable_attributes, searchable_attributes,
distinct_attribute, distinct_attribute,
proximity_precision,
stop_words, stop_words,
separator_tokens, separator_tokens,
non_separator_tokens, non_separator_tokens,
@@ -643,8 +548,7 @@ generate_configure!(
ranking_rules, ranking_rules,
typo_tolerance, typo_tolerance,
pagination, pagination,
faceting, faceting
embedders
); );
pub async fn update_all( pub async fn update_all(
@@ -657,8 +561,6 @@ pub async fn update_all(
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let new_settings = body.into_inner(); let new_settings = body.into_inner();
debug!(parameters = ?new_settings, "Update all settings");
let new_settings = validate_settings(new_settings, &index_scheduler)?;
analytics.publish( analytics.publish(
"Settings Updated".to_string(), "Settings Updated".to_string(),
@@ -691,10 +593,6 @@ pub async fn update_all(
"distinct_attribute": { "distinct_attribute": {
"set": new_settings.distinct_attribute.as_ref().set().is_some() "set": new_settings.distinct_attribute.as_ref().set().is_some()
}, },
"proximity_precision": {
"set": new_settings.proximity_precision.as_ref().set().is_some(),
"value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
},
"typo_tolerance": { "typo_tolerance": {
"enabled": new_settings.typo_tolerance "enabled": new_settings.typo_tolerance
.as_ref() .as_ref()
@@ -754,7 +652,6 @@ pub async fn update_all(
"synonyms": { "synonyms": {
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()), "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
}, },
"embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set())
}), }),
Some(&req), Some(&req),
); );
@@ -770,7 +667,7 @@ pub async fn update_all(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Update all settings"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -783,7 +680,7 @@ pub async fn get_all(
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let new_settings = settings(&index, &rtxn)?; let new_settings = settings(&index, &rtxn)?;
debug!(returns = ?new_settings, "Get all settings"); debug!("returns: {:?}", new_settings);
Ok(HttpResponse::Ok().json(new_settings)) Ok(HttpResponse::Ok().json(new_settings))
} }
@@ -806,16 +703,6 @@ pub async fn delete_all(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete all settings"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
fn validate_settings(
settings: Settings<Unchecked>,
index_scheduler: &IndexScheduler,
) -> Result<Settings<Unchecked>, ResponseError> {
if matches!(settings.embedders, Setting::Set(_)) {
index_scheduler.features().check_vector("Passing `embedders` in settings")?
}
Ok(settings.validate()?)
}

View File

@@ -1,318 +0,0 @@
use std::convert::Infallible;
use std::io::Write;
use std::ops::ControlFlow;
use std::pin::Pin;
use std::str::FromStr;
use std::sync::Arc;
use actix_web::web::{Bytes, Data};
use actix_web::{web, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use futures_util::Stream;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use tokio::sync::mpsc;
use tracing_subscriber::filter::Targets;
use tracing_subscriber::Layer;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::{LogRouteHandle, LogStderrHandle};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("stream")
.route(web::post().to(SeqHandler(get_logs)))
.route(web::delete().to(SeqHandler(cancel_logs))),
)
.service(web::resource("stderr").route(web::post().to(SeqHandler(update_stderr_target))));
}
#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)]
#[deserr(rename_all = camelCase)]
pub enum LogMode {
#[default]
Human,
Json,
Profile,
}
/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Clone, Debug)]
struct MyTargets(Targets);
/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Debug, thiserror::Error)]
enum MyParseError {
#[error(transparent)]
ParseError(#[from] tracing_subscriber::filter::ParseError),
#[error(
"Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`"
)]
Example,
}
impl FromStr for MyTargets {
type Err = MyParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.is_empty() {
Err(MyParseError::Example)
} else {
Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?))
}
}
}
impl MergeWithError<MyParseError> for DeserrJsonError<BadRequest> {
fn merge(
_self_: Option<Self>,
other: MyParseError,
merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
Self::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: other.to_string() },
merge_location,
)
}
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub struct GetLogs {
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
target: MyTargets,
#[deserr(default, error = DeserrJsonError<BadRequest>)]
mode: LogMode,
#[deserr(default = false, error = DeserrJsonError<BadRequest>)]
profile_memory: bool,
}
fn validate_get_logs<E: DeserializeError>(
logs: GetLogs,
location: ValuePointerRef,
) -> Result<GetLogs, E> {
if logs.profile_memory && logs.mode != LogMode::Profile {
Err(deserr::take_cf_content(E::error::<Infallible>(
None,
ErrorKind::Unexpected {
msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode),
},
location,
)))
} else {
Ok(logs)
}
}
struct LogWriter {
sender: mpsc::UnboundedSender<Vec<u8>>,
}
impl Write for LogWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?;
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
struct HandleGuard {
/// We need to keep an handle on the logs to make it available again when the streamer is dropped
logs: Arc<LogRouteHandle>,
}
impl Drop for HandleGuard {
fn drop(&mut self) {
if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
}
}
fn byte_stream(
receiver: mpsc::UnboundedReceiver<Vec<u8>>,
guard: HandleGuard,
) -> impl futures_util::Stream<Item = Result<Bytes, ResponseError>> {
futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move {
let vec = receiver.recv().await;
vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard)))
})
}
type PinnedByteStream = Pin<Box<dyn Stream<Item = Result<Bytes, ResponseError>>>>;
fn make_layer<
S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>,
>(
opt: &GetLogs,
logs: Data<LogRouteHandle>,
) -> (Box<dyn Layer<S> + Send + Sync>, PinnedByteStream) {
let guard = HandleGuard { logs: logs.into_inner() };
match opt.mode {
LogMode::Human => {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let fmt_layer = tracing_subscriber::fmt::layer()
.with_writer(move || LogWriter { sender: sender.clone() })
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
let stream = byte_stream(receiver, guard);
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
LogMode::Json => {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let fmt_layer = tracing_subscriber::fmt::layer()
.with_writer(move || LogWriter { sender: sender.clone() })
.json()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
let stream = byte_stream(receiver, guard);
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
LogMode::Profile => {
let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory);
let stream = entry_stream(trace, guard);
(Box::new(layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
}
}
fn entry_stream(
trace: tracing_trace::Trace,
guard: HandleGuard,
) -> impl Stream<Item = Result<Bytes, ResponseError>> {
let receiver = trace.into_receiver();
let entry_buf = Vec::new();
futures_util::stream::unfold(
(receiver, entry_buf, guard),
move |(mut receiver, mut entry_buf, guard)| async move {
let mut bytes = Vec::new();
while bytes.len() < 8192 {
entry_buf.clear();
let Ok(count) = tokio::time::timeout(
std::time::Duration::from_secs(1),
receiver.recv_many(&mut entry_buf, 100),
)
.await
else {
break;
};
if count == 0 {
if !bytes.is_empty() {
break;
}
// channel closed, exit
return None;
}
for entry in &entry_buf {
if let Err(error) = serde_json::to_writer(&mut bytes, entry) {
tracing::error!(
error = &error as &dyn std::error::Error,
"deserializing entry"
);
return Some((
Err(ResponseError::from_msg(
format!("error deserializing entry: {error}"),
Code::Internal,
)),
(receiver, entry_buf, guard),
));
}
}
}
Some((Ok(bytes.into()), (receiver, entry_buf, guard)))
},
)
}
pub async fn get_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
body: AwebJson<GetLogs, DeserrJsonError>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
let opt = body.into_inner();
let mut stream = None;
logs.modify(|layer| match layer.inner_mut() {
None => {
// there is no one getting logs
*layer.filter_mut() = opt.target.0.clone();
let (new_layer, new_stream) = make_layer(&opt, logs.clone());
*layer.inner_mut() = Some(new_layer);
stream = Some(new_stream);
}
Some(_) => {
// there is already someone getting logs
}
})
.unwrap();
if let Some(stream) = stream {
Ok(HttpResponse::Ok().streaming(stream))
} else {
Err(MeilisearchHttpError::AlreadyUsedLogRoute.into())
}
}
pub async fn cancel_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
Ok(HttpResponse::NoContent().finish())
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct UpdateStderrLogs {
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
target: MyTargets,
}
pub async fn update_stderr_target(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogStderrHandle>,
body: AwebJson<UpdateStderrLogs, DeserrJsonError>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
let opt = body.into_inner();
logs.modify(|layer| {
*layer.filter_mut() = opt.target.0.clone();
})
.unwrap();
Ok(HttpResponse::NoContent().finish())
}

View File

@@ -19,7 +19,7 @@ pub async fn get_metrics(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
auth_controller: Data<AuthController>, auth_controller: Data<AuthController>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_metrics()?; index_scheduler.features()?.check_metrics()?;
let auth_filters = index_scheduler.filters(); let auth_filters = index_scheduler.filters();
if !auth_filters.all_indexes_authorized() { if !auth_filters.all_indexes_authorized() {
let mut error = ResponseError::from(AuthenticationError::InvalidToken); let mut error = ResponseError::from(AuthenticationError::InvalidToken);

View File

@@ -3,6 +3,7 @@ use std::collections::BTreeMap;
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::settings::{Settings, Unchecked};
@@ -10,7 +11,6 @@ use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::json; use serde_json::json;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@@ -22,7 +22,6 @@ mod api_key;
mod dump; mod dump;
pub mod features; pub mod features;
pub mod indexes; pub mod indexes;
mod logs;
mod metrics; mod metrics;
mod multi_search; mod multi_search;
mod snapshot; mod snapshot;
@@ -32,7 +31,6 @@ pub mod tasks;
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::scope("/tasks").configure(tasks::configure)) cfg.service(web::scope("/tasks").configure(tasks::configure))
.service(web::resource("/health").route(web::get().to(get_health))) .service(web::resource("/health").route(web::get().to(get_health)))
.service(web::scope("/logs").configure(logs::configure))
.service(web::scope("/keys").configure(api_key::configure)) .service(web::scope("/keys").configure(api_key::configure))
.service(web::scope("/dumps").configure(dump::configure)) .service(web::scope("/dumps").configure(dump::configure))
.service(web::scope("/snapshots").configure(snapshot::configure)) .service(web::scope("/snapshots").configure(snapshot::configure))
@@ -252,7 +250,7 @@ async fn get_stats(
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?; let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
debug!(returns = ?stats, "Get stats"); debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats)) Ok(HttpResponse::Ok().json(stats))
} }

View File

@@ -3,17 +3,16 @@ use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse}; use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use serde::Serialize; use serde::Serialize;
use tracing::debug;
use crate::analytics::{Analytics, MultiSearchAggregator}; use crate::analytics::{Analytics, MultiSearchAggregator};
use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::embed;
use crate::search::{ use crate::search::{
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex, add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
}; };
@@ -42,56 +41,54 @@ pub async fn multi_search_with_post(
let queries = params.into_inner().queries; let queries = params.into_inner().queries;
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req); let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
let features = index_scheduler.features(); let features = index_scheduler.features()?;
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
// changes. // changes.
let search_results: Result<_, (ResponseError, usize)> = async { let search_results: Result<_, (ResponseError, usize)> = (|| {
let mut search_results = Vec::with_capacity(queries.len()); async {
for (query_index, (index_uid, mut query)) in let mut search_results = Vec::with_capacity(queries.len());
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() for (query_index, (index_uid, mut query)) in
{ queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
debug!(on_index = query_index, parameters = ?query, "Multi-search");
// Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) {
return Err(AuthenticationError::InvalidToken).with_index(query_index);
}
// Apply search rules from tenant token
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
{ {
add_search_rules(&mut query, search_rules); debug!("multi-search #{query_index}: called with params: {:?}", query);
// Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) {
return Err(AuthenticationError::InvalidToken).with_index(query_index);
}
// Apply search rules from tenant token
if let Some(search_rules) =
index_scheduler.filters().get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let index = index_scheduler
.index(&index_uid)
.map_err(|err| {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err
})
.with_index(query_index)?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features))
.await
.with_index(query_index)?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result.with_index(query_index)?,
});
} }
Ok(search_results)
let index = index_scheduler
.index(&index_uid)
.map_err(|err| {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err
})
.with_index(query_index)?;
let distribution = embed(&mut query, index_scheduler.get_ref(), &index)
.await
.with_index(query_index)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, features, distribution)
})
.await
.with_index(query_index)?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result.with_index(query_index)?,
});
} }
Ok(search_results) })()
}
.await; .await;
if search_results.is_ok() { if search_results.is_ok() {
@@ -107,7 +104,7 @@ pub async fn multi_search_with_post(
err err
})?; })?;
debug!(returns = ?search_results, "Multi-search"); debug!("returns: {:?}", search_results);
Ok(HttpResponse::Ok().json(SearchResults { results: search_results })) Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
} }

View File

@@ -1,10 +1,10 @@
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@@ -27,6 +27,6 @@ pub async fn create_snapshot(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create snapshot"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }

View File

@@ -8,9 +8,11 @@ use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{InvalidTaskDateError, ResponseError}; use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList}; use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
use meilisearch_types::task_view::TaskView; use meilisearch_types::tasks::{
use meilisearch_types::tasks::{Kind, KindWithContent, Status}; serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
};
use serde::Serialize; use serde::Serialize;
use serde_json::json; use serde_json::json;
use time::format_description::well_known::Rfc3339; use time::format_description::well_known::Rfc3339;
@@ -35,6 +37,140 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks)))) .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))); .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
} }
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskView {
pub uid: TaskId,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
#[serde(rename = "type")]
pub kind: Kind,
pub canceled_by: Option<TaskId>,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<DetailsView>,
pub error: Option<ResponseError>,
#[serde(serialize_with = "serialize_duration", default)]
pub duration: Option<Duration>,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option", default)]
pub started_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339::option", default)]
pub finished_at: Option<OffsetDateTime>,
}
impl TaskView {
pub fn from_task(task: &Task) -> TaskView {
TaskView {
uid: task.uid,
index_uid: task.index_uid().map(ToOwned::to_owned),
status: task.status,
kind: task.kind.as_kind(),
canceled_by: task.canceled_by,
details: task.details.clone().map(DetailsView::from),
error: task.error.clone(),
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
}
}
}
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub received_documents: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indexed_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub provided_ids: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub matched_tasks: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub canceled_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub original_filter: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub settings: Option<Box<Settings<Unchecked>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub swaps: Option<Vec<IndexSwap>>,
}
impl From<Details> for DetailsView {
fn from(details: Details) -> Self {
match details {
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
DetailsView {
received_documents: Some(received_documents),
indexed_documents: Some(indexed_documents),
..DetailsView::default()
}
}
Details::SettingsUpdate { settings } => {
DetailsView { settings: Some(settings), ..DetailsView::default() }
}
Details::IndexInfo { primary_key } => {
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
}
Details::DocumentDeletion {
provided_ids: received_document_ids,
deleted_documents,
} => DetailsView {
provided_ids: Some(received_document_ids),
deleted_documents: Some(deleted_documents),
original_filter: Some(None),
..DetailsView::default()
},
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
DetailsView {
provided_ids: Some(0),
original_filter: Some(Some(original_filter)),
deleted_documents: Some(deleted_documents),
..DetailsView::default()
}
}
Details::ClearAll { deleted_documents } => {
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
}
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
canceled_tasks: Some(canceled_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
deleted_tasks: Some(deleted_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::Dump { dump_uid } => {
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
}
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
}
}
}
#[derive(Debug, Deserr)] #[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TasksFilterQuery { pub struct TasksFilterQuery {

View File

@@ -7,21 +7,24 @@ use deserr::Deserr;
use either::Either; use either::Either;
use index_scheduler::RoFeatures; use index_scheduler::RoFeatures;
use indexmap::IndexMap; use indexmap::IndexMap;
use log::warn;
use meilisearch_auth::IndexSearchRules; use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::heed::RoTxn; use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::DistributionShift; use meilisearch_types::milli::{
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues}; dot_product_similarity, FacetValueHit, InternalError, OrderBy, SearchForFacetValues,
};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document}; use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder; use milli::tokenizer::TokenizerBuilder;
use milli::{ use milli::{
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder, AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder,
SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, SortError, TermsMatchingStrategy, VectorOrArrayOfVectors, DEFAULT_VALUES_PER_FACET,
}; };
use ordered_float::OrderedFloat;
use regex::Regex; use regex::Regex;
use serde::Serialize; use serde::Serialize;
use serde_json::{json, Value}; use serde_json::{json, Value};
@@ -36,7 +39,6 @@ pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
pub const DEFAULT_CROP_MARKER: fn() -> String = || "".to_string(); pub const DEFAULT_CROP_MARKER: fn() -> String = || "".to_string();
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string(); pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string(); pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
#[derive(Debug, Clone, Default, PartialEq, Deserr)] #[derive(Debug, Clone, Default, PartialEq, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
@@ -45,8 +47,6 @@ pub struct SearchQuery {
pub q: Option<String>, pub q: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)] #[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
pub vector: Option<Vec<f32>>, pub vector: Option<Vec<f32>>,
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
pub hybrid: Option<HybridQuery>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)] #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize, pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)] #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
@@ -87,48 +87,6 @@ pub struct SearchQuery {
pub attributes_to_search_on: Option<Vec<String>>, pub attributes_to_search_on: Option<Vec<String>>,
} }
#[derive(Debug, Clone, Default, PartialEq, Deserr)]
#[deserr(error = DeserrJsonError<InvalidHybridQuery>, rename_all = camelCase, deny_unknown_fields)]
pub struct HybridQuery {
/// TODO validate that sementic ratio is between 0.0 and 1,0
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
pub semantic_ratio: SemanticRatio,
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
pub embedder: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
#[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
pub struct SemanticRatio(f32);
impl Default for SemanticRatio {
fn default() -> Self {
DEFAULT_SEMANTIC_RATIO()
}
}
impl std::convert::TryFrom<f32> for SemanticRatio {
type Error = InvalidSearchSemanticRatio;
fn try_from(f: f32) -> Result<Self, Self::Error> {
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
#[allow(clippy::manual_range_contains)]
if f > 1.0 || f < 0.0 {
Err(InvalidSearchSemanticRatio)
} else {
Ok(SemanticRatio(f))
}
}
}
impl std::ops::Deref for SemanticRatio {
type Target = f32;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl SearchQuery { impl SearchQuery {
pub fn is_finite_pagination(&self) -> bool { pub fn is_finite_pagination(&self) -> bool {
self.page.or(self.hits_per_page).is_some() self.page.or(self.hits_per_page).is_some()
@@ -148,8 +106,6 @@ pub struct SearchQueryWithIndex {
pub q: Option<String>, pub q: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)] #[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub vector: Option<Vec<f32>>, pub vector: Option<Vec<f32>>,
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
pub hybrid: Option<HybridQuery>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)] #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize, pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)] #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
@@ -215,7 +171,6 @@ impl SearchQueryWithIndex {
crop_marker, crop_marker,
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid,
} = self; } = self;
( (
index_uid, index_uid,
@@ -241,7 +196,6 @@ impl SearchQueryWithIndex {
crop_marker, crop_marker,
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid,
// do not use ..Default::default() here, // do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
}, },
@@ -381,44 +335,19 @@ fn prepare_search<'t>(
rtxn: &'t RoTxn, rtxn: &'t RoTxn,
query: &'t SearchQuery, query: &'t SearchQuery,
features: RoFeatures, features: RoFeatures,
distribution: Option<DistributionShift>,
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(rtxn); let mut search = index.search(rtxn);
if query.vector.is_some() { if query.vector.is_some() && query.q.is_some() {
features.check_vector("Passing `vector` as a query parameter")?; warn!("Ignoring the query string `q` when used with the `vector` parameter.");
} }
if query.hybrid.is_some() {
features.check_vector("Passing `hybrid` as a query parameter")?;
}
if query.hybrid.is_none() && query.q.is_some() && query.vector.is_some() {
return Err(MeilisearchHttpError::MissingSearchHybrid);
}
search.distribution_shift(distribution);
if let Some(ref vector) = query.vector { if let Some(ref vector) = query.vector {
match &query.hybrid { search.vector(vector.clone());
// If semantic ratio is 0.0, only the query search will impact the search results,
// skip the vector
Some(hybrid) if *hybrid.semantic_ratio == 0.0 => (),
_otherwise => {
search.vector(vector.clone());
}
}
} }
if let Some(ref q) = query.q { if let Some(ref query) = query.q {
match &query.hybrid { search.query(query);
// If semantic ratio is 1.0, only the vector search will impact the search results,
// skip the query
Some(hybrid) if *hybrid.semantic_ratio == 1.0 => (),
_otherwise => {
search.query(q);
}
}
} }
if let Some(ref searchable) = query.attributes_to_search_on { if let Some(ref searchable) = query.attributes_to_search_on {
@@ -431,7 +360,6 @@ fn prepare_search<'t>(
let max_total_hits = index let max_total_hits = index
.pagination_max_total_hits(rtxn) .pagination_max_total_hits(rtxn)
.map_err(milli::Error::from)? .map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination); search.exhaustive_number_hits(is_finite_pagination);
@@ -441,8 +369,12 @@ fn prepare_search<'t>(
ScoringStrategy::Skip ScoringStrategy::Skip
}); });
if let Some(HybridQuery { embedder: Some(embedder), .. }) = &query.hybrid { if query.show_ranking_score_details {
search.embedder_name(embedder); features.check_score_details()?;
}
if query.vector.is_some() {
features.check_vector()?;
} }
// compute the offset on the limit depending on the pagination mode. // compute the offset on the limit depending on the pagination mode.
@@ -488,22 +420,15 @@ pub fn perform_search(
index: &Index, index: &Index,
query: SearchQuery, query: SearchQuery,
features: RoFeatures, features: RoFeatures,
distribution: Option<DistributionShift>,
) -> Result<SearchResult, MeilisearchHttpError> { ) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now(); let before_search = Instant::now();
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let (search, is_finite_pagination, max_total_hits, offset) = let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query, features, distribution)?; prepare_search(index, &rtxn, &query, features)?;
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } = let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
match &query.hybrid { search.execute()?;
Some(hybrid) => match *hybrid.semantic_ratio {
ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?,
ratio => search.execute_hybrid(ratio)?,
},
None => search.execute()?,
};
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -612,17 +537,13 @@ pub fn perform_search(
insert_geo_distance(sort, &mut document); insert_geo_distance(sort, &mut document);
} }
let mut semantic_score = None; let semantic_score = match query.vector.as_ref() {
for details in &score { Some(vector) => match extract_field("_vectors", &fields_ids_map, obkv)? {
if let ScoreDetails::Vector(score_details::Vector { Some(vectors) => compute_semantic_score(vector, vectors)?,
target_vector: _, None => None,
value_similarity: Some((_matching_vector, similarity)), },
}) = details None => None,
{ };
semantic_score = Some(*similarity);
break;
}
}
let ranking_score = let ranking_score =
query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter())); query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
@@ -665,7 +586,6 @@ pub fn perform_search(
let max_values_by_facet = index let max_values_by_facet = index
.max_values_per_facet(&rtxn) .max_values_per_facet(&rtxn)
.map_err(milli::Error::from)? .map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET); .unwrap_or(DEFAULT_VALUES_PER_FACET);
facet_distribution.max_values_per_facet(max_values_by_facet); facet_distribution.max_values_per_facet(max_values_by_facet);
@@ -725,15 +645,11 @@ pub fn perform_facet_search(
let before_search = Instant::now(); let before_search = Instant::now();
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?; let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features)?;
let mut facet_search = let mut facet_search = SearchForFacetValues::new(facet_name, search);
SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some());
if let Some(facet_query) = &facet_query { if let Some(facet_query) = &facet_query {
facet_search.query(facet_query); facet_search.query(facet_query);
} }
if let Some(max_facets) = index.max_values_per_facet(&rtxn)? {
facet_search.max_values(max_facets as usize);
}
Ok(FacetSearchResult { Ok(FacetSearchResult {
facet_hits: facet_search.execute()?, facet_hits: facet_search.execute()?,
@@ -758,6 +674,18 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
} }
} }
fn compute_semantic_score(query: &[f32], vectors: Value) -> milli::Result<Option<f32>> {
let vectors = serde_json::from_value(vectors)
.map(VectorOrArrayOfVectors::into_array_of_vectors)
.map_err(InternalError::SerdeJson)?;
Ok(vectors
.into_iter()
.flatten()
.map(|v| OrderedFloat(dot_product_similarity(query, &v)))
.max()
.map(OrderedFloat::into_inner))
}
fn compute_formatted_options( fn compute_formatted_options(
attr_to_highlight: &HashSet<String>, attr_to_highlight: &HashSet<String>,
attr_to_crop: &[String], attr_to_crop: &[String],
@@ -885,6 +813,22 @@ fn make_document(
Ok(document) Ok(document)
} }
/// Extract the JSON value under the field name specified
/// but doesn't support nested objects.
fn extract_field(
field_name: &str,
field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16,
) -> Result<Option<serde_json::Value>, MeilisearchHttpError> {
match field_ids_map.id(field_name) {
Some(fid) => match obkv.get(fid) {
Some(value) => Ok(serde_json::from_slice(value).map(Some)?),
None => Ok(None),
},
None => Ok(None),
}
}
fn format_fields<'a>( fn format_fields<'a>(
document: &Document, document: &Document,
field_ids_map: &FieldsIdsMap, field_ids_map: &FieldsIdsMap,
@@ -896,14 +840,6 @@ fn format_fields<'a>(
let mut matches_position = compute_matches.then(BTreeMap::new); let mut matches_position = compute_matches.then(BTreeMap::new);
let mut document = document.clone(); let mut document = document.clone();
// reduce the formatted option list to the attributes that should be formatted,
// instead of all the attributes to display.
let formatting_fields_options: Vec<_> = formatted_options
.iter()
.filter(|(_, option)| option.should_format())
.map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
.collect();
// select the attributes to retrieve // select the attributes to retrieve
let displayable_names = let displayable_names =
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name")); displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
@@ -912,15 +848,13 @@ fn format_fields<'a>(
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo` // to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
// highlighted. // highlighted.
// Warn: The time to compute the format list scales with the number of fields to format; let format = formatted_options
// cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
// d*f where d is the total number of fields to display and f is the total number of fields to format.
let format = formatting_fields_options
.iter() .iter()
.filter(|(name, _option)| { .filter(|(field, _option)| {
let name = field_ids_map.name(**field).unwrap();
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name) milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
}) })
.map(|(_, option)| **option) .map(|(_, option)| *option)
.reduce(|acc, option| acc.merge(option)); .reduce(|acc, option| acc.merge(option));
let mut infos = Vec::new(); let mut infos = Vec::new();
@@ -1017,7 +951,7 @@ fn format_value<'a>(
let value = matcher.format(format_options); let value = matcher.format(format_options);
Value::String(value.into_owned()) Value::String(value.into_owned())
} }
None => Value::String(s), None => Value::Number(number),
} }
} }
value => value, value => value,

View File

@@ -2,12 +2,10 @@ use std::collections::{HashMap, HashSet};
use ::time::format_description::well_known::Rfc3339; use ::time::format_description::well_known::Rfc3339;
use maplit::{hashmap, hashset}; use maplit::{hashmap, hashset};
use meilisearch::Opt;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use tempfile::TempDir;
use time::{Duration, OffsetDateTime}; use time::{Duration, OffsetDateTime};
use crate::common::{default_settings, Server, Value}; use crate::common::{Server, Value};
use crate::json; use crate::json;
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> = pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
@@ -59,8 +57,6 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"}, ("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"},
("GET", "/version") => hashset!{"version", "*"}, ("GET", "/version") => hashset!{"version", "*"},
("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"}, ("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"},
("POST", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("DELETE", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"}, ("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
("GET", "/keys/mykey/") => hashset!{"keys.get", "*"}, ("GET", "/keys/mykey/") => hashset!{"keys.get", "*"},
("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"}, ("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"},
@@ -199,9 +195,7 @@ async fn access_authorized_master_key() {
#[actix_rt::test] #[actix_rt::test]
async fn access_authorized_restricted_index() { async fn access_authorized_restricted_index() {
let dir = TempDir::new().unwrap(); let mut server = Server::new_auth().await;
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
let mut server = Server::new_auth_with_options(enable_metrics, dir).await;
for ((method, route), actions) in AUTHORIZATIONS.iter() { for ((method, route), actions) in AUTHORIZATIONS.iter() {
for action in actions { for action in actions {
// create a new API key letting only the needed action. // create a new API key letting only the needed action.

View File

@@ -5,11 +5,9 @@ pub mod service;
use std::fmt::{self, Display}; use std::fmt::{self, Display};
#[allow(unused)]
pub use index::{GetAllDocumentsOptions, GetDocumentOptions}; pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
use meili_snap::json_string; use meili_snap::json_string;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[allow(unused)]
pub use server::{default_settings, Server}; pub use server::{default_settings, Server};
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
@@ -64,7 +62,7 @@ impl Display for Value {
write!( write!(
f, f,
"{}", "{}",
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }) json_string!(self, { ".enqueuedAt" => "[date]", ".processedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
) )
} }
} }

View File

@@ -9,12 +9,10 @@ use actix_web::http::StatusCode;
use byte_unit::{Byte, ByteUnit}; use byte_unit::{Byte, ByteUnit};
use clap::Parser; use clap::Parser;
use meilisearch::option::{IndexerOpts, MaxMemory, Opt}; use meilisearch::option::{IndexerOpts, MaxMemory, Opt};
use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer}; use meilisearch::{analytics, create_app, setup_meilisearch};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use tempfile::TempDir; use tempfile::TempDir;
use tokio::time::sleep; use tokio::time::sleep;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use super::index::Index; use super::index::Index;
use super::service::Service; use super::service::Service;
@@ -83,24 +81,10 @@ impl Server {
Response = ServiceResponse<impl MessageBody>, Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error, Error = actix_web::Error,
> { > {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let (_stderr_layer, stderr_layer_handle) = tracing_subscriber::reload::Layer::new(
(Box::new(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE),
)
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>)
.with_filter(tracing_subscriber::filter::Targets::new()),
);
actix_web::test::init_service(create_app( actix_web::test::init_service(create_app(
self.service.index_scheduler.clone().into(), self.service.index_scheduler.clone().into(),
self.service.auth.clone().into(), self.service.auth.clone().into(),
self.service.options.clone(), self.service.options.clone(),
(route_layer_handle, stderr_layer_handle),
analytics::MockAnalytics::new(&self.service.options), analytics::MockAnalytics::new(&self.service.options),
true, true,
)) ))
@@ -218,10 +202,6 @@ impl Server {
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) { pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
self.service.patch("/experimental-features", value).await self.service.patch("/experimental-features", value).await
} }
pub async fn get_metrics(&self) -> (Value, StatusCode) {
self.service.get("/metrics").await
}
} }
pub fn default_settings(dir: impl AsRef<Path>) -> Opt { pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
@@ -241,7 +221,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
skip_index_budget: true, skip_index_budget: true,
..Parser::parse_from(None as Option<&str>) ..Parser::parse_from(None as Option<&str>)
}, },
experimental_enable_metrics: false, experimental_enable_metrics: true,
..Parser::parse_from(None as Option<&str>) ..Parser::parse_from(None as Option<&str>)
} }
} }

View File

@@ -5,10 +5,8 @@ use actix_web::http::StatusCode;
use actix_web::test; use actix_web::test;
use actix_web::test::TestRequest; use actix_web::test::TestRequest;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer}; use meilisearch::{analytics, create_app, Opt};
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use crate::common::encoder::Encoder; use crate::common::encoder::Encoder;
use crate::common::Value; use crate::common::Value;
@@ -107,24 +105,10 @@ impl Service {
} }
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let (_stderr_layer, stderr_layer_handle) = tracing_subscriber::reload::Layer::new(
(Box::new(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE),
)
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>)
.with_filter(tracing_subscriber::filter::Targets::new()),
);
let app = test::init_service(create_app( let app = test::init_service(create_app(
self.index_scheduler.clone().into(), self.index_scheduler.clone().into(),
self.auth.clone().into(), self.auth.clone().into(),
self.options.clone(), self.options.clone(),
(route_layer_handle, stderr_layer_handle),
analytics::MockAnalytics::new(&self.options), analytics::MockAnalytics::new(&self.options),
true, true,
)) ))

View File

@@ -1760,181 +1760,6 @@ async fn add_documents_invalid_geo_field() {
"finishedAt": "[date]" "finishedAt": "[date]"
} }
"###); "###);
// The three next tests are related to #4333
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "12",
"_geo": { "lng": null, "lat": 67}
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 14,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse longitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "12",
"_geo": { "lng": 35, "lat": null }
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 15,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "13",
"_geo": { "lng": null, "lat": null }
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 16,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude nor longitude in the document with the id: `13`. Was expecting finite numbers but instead got `null` and `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
// Related to #4333
#[actix_rt::test]
async fn add_invalid_geo_and_then_settings() {
let server = Server::new().await;
let index = server.index("test");
index.create(Some("id")).await;
// _geo is not an object
let documents = json!([
{
"id": "11",
"_geo": { "lat": null, "lng": null },
}
]);
let (ret, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let ret = index.wait_task(ret.uid()).await;
snapshot!(ret, @r###"
{
"uid": 1,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (ret, code) = index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
snapshot!(code, @"202 Accepted");
let ret = index.wait_task(ret.uid()).await;
snapshot!(ret, @r###"
{
"uid": 2,
"indexUid": "test",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"sortableAttributes": [
"_geo"
]
},
"error": {
"message": "Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
} }
#[actix_rt::test] #[actix_rt::test]

View File

@@ -397,7 +397,7 @@ async fn delete_document_by_complex_filter() {
"canceledBy": null, "canceledBy": null,
"details": { "details": {
"providedIds": 0, "providedIds": 0,
"deletedDocuments": 2, "deletedDocuments": 4,
"originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]" "originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
}, },
"error": null, "error": null,

View File

@@ -20,8 +20,6 @@ pub enum GetDump {
RubyGemsWithSettingsV4, RubyGemsWithSettingsV4,
TestV5, TestV5,
TestV6WithExperimental,
} }
impl GetDump { impl GetDump {
@@ -70,10 +68,6 @@ impl GetDump {
GetDump::TestV5 => { GetDump::TestV5 => {
exist_relative_path!("tests/assets/v5_v0.28.0_test_dump.dump").into() exist_relative_path!("tests/assets/v5_v0.28.0_test_dump.dump").into()
} }
GetDump::TestV6WithExperimental => exist_relative_path!(
"tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump"
)
.into(),
} }
} }
} }

View File

@@ -59,7 +59,6 @@ async fn import_dump_v1_movie_raw() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -220,7 +219,6 @@ async fn import_dump_v1_movie_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -367,7 +365,6 @@ async fn import_dump_v1_rubygems_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -500,7 +497,6 @@ async fn import_dump_v2_movie_raw() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -645,7 +641,6 @@ async fn import_dump_v2_movie_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -789,7 +784,6 @@ async fn import_dump_v2_rubygems_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -922,7 +916,6 @@ async fn import_dump_v3_movie_raw() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -1067,7 +1060,6 @@ async fn import_dump_v3_movie_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -1211,7 +1203,6 @@ async fn import_dump_v3_rubygems_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -1344,7 +1335,6 @@ async fn import_dump_v4_movie_raw() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -1489,7 +1479,6 @@ async fn import_dump_v4_movie_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -1633,7 +1622,6 @@ async fn import_dump_v4_rubygems_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -1822,107 +1810,3 @@ async fn import_dump_v5() {
json_string!(tasks, { ".results[].details.dumpUid" => "[uid]", ".results[].duration" => "[duration]" , ".results[].startedAt" => "[date]" , ".results[].finishedAt" => "[date]" }) json_string!(tasks, { ".results[].details.dumpUid" => "[uid]", ".results[].duration" => "[duration]" , ".results[].startedAt" => "[date]" , ".results[].finishedAt" => "[date]" })
); );
} }
#[actix_rt::test]
async fn import_dump_v6_containing_experimental_features() {
let temp = tempfile::tempdir().unwrap();
let options = Opt {
import_dump: Some(GetDump::TestV6WithExperimental.path()),
..default_settings(temp.path())
};
let mut server = Server::new_auth_with_options(options, temp).await;
server.use_api_key("MASTER_KEY");
let (indexes, code) = server.list_indexes(None, None).await;
assert_eq!(code, 200, "{indexes}");
assert_eq!(indexes["results"].as_array().unwrap().len(), 1);
assert_eq!(indexes["results"][0]["uid"], json!("movies"));
assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
let (response, code) = server.get_features().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
let index = server.index("movies");
let (response, code) = index.settings().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity"
],
"stopWords": [],
"nonSeparatorTokens": [],
"separatorTokens": [],
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byAttribute",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100,
"sortFacetValuesBy": {
"*": "alpha"
}
},
"pagination": {
"maxTotalHits": 1000
}
}
"###);
// the expected order is [1, 3, 2] instead of [3, 1, 2]
// because the attribute scale doesn't make the difference between 1 and 3.
index
.search(json!({"q": "the soup of day"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
},
{
"id": 2,
"a": "Soup of day",
"b": "many the lazy fish"
}
]
"###);
})
.await;
}

View File

@@ -1,7 +1,4 @@
use meilisearch::Opt; use crate::common::Server;
use tempfile::TempDir;
use crate::common::{default_settings, Server};
use crate::json; use crate::json;
/// Feature name to test against. /// Feature name to test against.
@@ -18,10 +15,8 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"vectorStore": false, "scoreDetails": false,
"metrics": false, "vectorStore": false
"logsRoute": false,
"exportPuffinReports": false
} }
"###); "###);
@@ -30,10 +25,8 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"vectorStore": true, "scoreDetails": false,
"metrics": false, "vectorStore": true
"logsRoute": false,
"exportPuffinReports": false
} }
"###); "###);
@@ -42,10 +35,8 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"vectorStore": true, "scoreDetails": false,
"metrics": false, "vectorStore": true
"logsRoute": false,
"exportPuffinReports": false
} }
"###); "###);
@@ -55,10 +46,8 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"vectorStore": true, "scoreDetails": false,
"metrics": false, "vectorStore": true
"logsRoute": false,
"exportPuffinReports": false
} }
"###); "###);
@@ -68,74 +57,12 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"vectorStore": true, "scoreDetails": false,
"metrics": false, "vectorStore": true
"logsRoute": false,
"exportPuffinReports": false
} }
"###); "###);
} }
#[actix_rt::test]
async fn experimental_feature_metrics() {
// instance flag for metrics enables metrics at startup
let dir = TempDir::new().unwrap();
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
let server = Server::new_with_options(enable_metrics).await.unwrap();
let (response, code) = server.get_features().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"vectorStore": false,
"metrics": true,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
// metrics are not returned in json format
// so the test server will return null
meili_snap::snapshot!(response, @"null");
// disabling metrics results in invalid request
let (response, code) = server.set_features(json!({"metrics": false})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response["metrics"], @"false");
let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// enabling metrics via HTTP results in valid request
let (response, code) = server.set_features(json!({"metrics": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response["metrics"], @"true");
let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null");
// startup without flag respects persisted metrics value
let disable_metrics =
Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
let (response, code) = server_no_flag.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null");
}
#[actix_rt::test] #[actix_rt::test]
async fn errors() { async fn errors() {
let server = Server::new().await; let server = Server::new().await;
@@ -146,7 +73,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`", "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`",
"code": "bad_request", "code": "bad_request",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request" "link": "https://docs.meilisearch.com/errors#bad_request"

View File

@@ -5,7 +5,6 @@ mod documents;
mod dumps; mod dumps;
mod features; mod features;
mod index; mod index;
mod logs;
mod search; mod search;
mod settings; mod settings;
mod snapshot; mod snapshot;

View File

@@ -1,193 +0,0 @@
use meili_snap::*;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn logs_stream_bad_target() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Our help message
let (response, code) = server.service.post("/logs/stream", json!({ "target": "" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// An error from the target parser
let (response, code) = server.service.post("/logs/stream", json!({ "target": "==" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: invalid filter directive: too many '=' in filter directive, expected 0 or 1",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_mode() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong value
let (response, code) = server.service.post("/logs/stream", json!({ "mode": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown value `tamo` at `.mode`: expected one of `human`, `json`, `profile`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_profile_memory() {
let server = Server::new().await;
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found a string: `\"tamo\"`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": ["hello", "kefir"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found an array: `[\"hello\",\"kefir\"]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Used with default parameters
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Human mode.",
"code": "invalid_settings_typo_tolerance",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance"
}
"###);
// Used with an unsupported mode
let (response, code) =
server.service.post("/logs/stream", json!({ "mode": "fmt", "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown value `fmt` at `.mode`: expected one of `human`, `json`, `profile`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_without_enabling_the_route() {
let server = Server::new().await;
let (response, code) = server.service.post("/logs/stream", json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Modifying logs through the `/logs/*` routes requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) = server.service.delete("/logs/stream").await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Modifying logs through the `/logs/*` routes requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) = server.service.post("/logs/stderr", json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Modifying logs through the `/logs/*` routes requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}

View File

@@ -1,99 +0,0 @@
mod error;
use std::rc::Rc;
use std::str::FromStr;
use actix_web::http::header::ContentType;
use meili_snap::snapshot;
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::Layer;
use crate::common::{default_settings, Server};
use crate::json;
#[actix_web::test]
async fn basic_test_log_stream_route() {
let db_path = tempfile::tempdir().unwrap();
let server = Server::new_with_options(Opt {
experimental_enable_logs_route: true,
..default_settings(db_path.path())
})
.await
.unwrap();
let (route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let (_stderr_layer, stderr_layer_handle) = tracing_subscriber::reload::Layer::new(
(Box::new(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE),
) as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>)
.with_filter(tracing_subscriber::filter::Targets::new()),
);
let subscriber = tracing_subscriber::registry().with(route_layer).with(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
.with_filter(tracing_subscriber::filter::LevelFilter::from_str("OFF").unwrap()),
);
let app = actix_web::test::init_service(create_app(
server.service.index_scheduler.clone().into(),
server.service.auth.clone().into(),
server.service.options.clone(),
(route_layer_handle, stderr_layer_handle),
analytics::MockAnalytics::new(&server.service.options),
true,
))
.await;
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
let app = Rc::new(app);
// First, we start listening on the `/logs/stream` route
let handle_app = app.clone();
let handle = tokio::task::spawn_local(async move {
let req = actix_web::test::TestRequest::post()
.uri("/logs/stream")
.insert_header(ContentType::json())
.set_payload(
serde_json::to_vec(&json!({
"mode": "human",
"target": "info",
}))
.unwrap(),
);
let req = req.to_request();
let ret = actix_web::test::call_service(&*handle_app, req).await;
actix_web::test::read_body(ret).await
});
// We're going to create an index to get at least one info log saying we processed a batch of task
let (ret, _code) = server.create_index(json!({ "uid": "tamo" })).await;
snapshot!(ret, @r###"
{
"taskUid": 0,
"indexUid": "tamo",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
server.wait_task(ret.uid()).await;
let req = actix_web::test::TestRequest::delete().uri("/logs/stream");
let req = req.to_request();
let ret = actix_web::test::call_service(&*app, req).await;
let code = ret.status();
snapshot!(code, @"204 No Content");
let logs = handle.await.unwrap();
let logs = String::from_utf8(logs.to_vec()).unwrap();
assert!(logs.contains("INFO"), "{logs}");
}

Some files were not shown because too many files have changed in this diff Show More