Compare commits

..

6 Commits

Author SHA1 Message Date
1032d82643 Merge #4140
4140: Fix bug where search with distinct attribute has wrong totalHits and totalPages r=dureuill a=vivek-26

# Pull Request

## Related issue
Fixes #4130

## What does this PR do?
This PR - 
- Fixes the bug where search with distinct attribute and pagination (no ranking) returns incorrect values for `totalHits` and `totalPages`.
- Add/update unit and integration tests.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
2023-10-19 14:00:53 +00:00
18bbadf645 Add explanatory comment 2023-10-19 15:58:25 +02:00
aa2cd52797 add/update tests when search with distinct attribute & pagination with no ranking 2023-10-19 16:50:14 +05:30
460e61b853 compute all candidates correctly when skipping 2023-10-19 16:48:45 +05:30
ec90946bdf Merge #4137
4137: Update version for the next release (v1.4.2) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-10-19 08:52:54 +00:00
58864a4dfa Update version for the next release (v1.4.2) in Cargo.toml 2023-10-19 08:50:11 +00:00
278 changed files with 8418 additions and 7521 deletions

View File

@ -7,17 +7,19 @@ assignees: ''
---
Related product team resources: [PRD]() (_internal only_)
Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
Related product discussion:
Related spec: WIP
## Motivation
<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->
<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
## Usage
<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->
<!---Write a quick description of the usage if the usage has already been defined-->
Refer to the final spec to know the details and the final decisions about the usage.
## TODO

View File

@ -74,4 +74,4 @@ jobs:
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scripts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

View File

@ -1,98 +0,0 @@
name: Benchmarks (PR)
on: issue_comment
permissions:
issues: write
env:
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
run-benchmarks-on-comment:
if: startsWith(github.event.comment.body, '/benchmark')
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 4320 # 72h
steps:
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Check for Command
id: command
uses: xt0rted/slash-command-action@v2
with:
command: benchmark
reaction-type: "eyes"
repo-token: ${{ env.GH_TOKEN }}
- uses: xt0rted/pull-request-comment-branch@v2
id: comment-branch
with:
repo_token: ${{ env.GH_TOKEN }}
- uses: actions/checkout@v3
if: success()
with:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${{ steps.command.outputs.command-arguments }}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${{ steps.command.outputs.command-arguments }} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd benchmarks
cargo bench --bench ${{ steps.command.outputs.command-arguments }} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Compute the diff of the benchmarks and send a message on the GitHub PR
- name: Compute and send a message in the PR
env:
GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
run: |
set -x
export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
echo '```' >> body.txt
./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
echo '```' >> body.txt
gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt

View File

@ -2,8 +2,8 @@ name: Create issue to upgrade dependencies
on:
schedule:
# Run the first of the month, every 6 month
- cron: '0 0 1 */6 *'
# Run the first of the month, every 3 month
- cron: '0 0 1 */3 *'
workflow_dispatch:
jobs:

View File

@ -50,7 +50,7 @@ jobs:
needs: check-version
steps:
- name: Create PR to Homebrew
uses: mislav/bump-homebrew-formula-action@v3
uses: mislav/bump-homebrew-formula-action@v2
with:
formula-name: meilisearch
formula-path: Formula/m/meilisearch.rb

View File

@ -57,20 +57,20 @@ jobs:
echo "date=$commit_date" >> $GITHUB_OUTPUT
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@v2
- name: Login to Docker Hub
uses: docker/login-action@v3
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
uses: docker/metadata-action@v4
with:
images: getmeili/meilisearch
# Prevent `latest` to be updated for each new tag pushed.
@ -83,7 +83,7 @@ jobs:
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Build and push
uses: docker/build-push-action@v5
uses: docker/build-push-action@v4
with:
push: true
platforms: linux/amd64,linux/arm64

View File

@ -14,7 +14,6 @@ on:
env:
MEILI_MASTER_KEY: 'masterKey'
MEILI_NO_ANALYTICS: 'true'
DISABLE_COVERAGE: 'true'
jobs:
define-docker-image:
@ -31,117 +30,6 @@ jobs:
if [[ $event == 'workflow_dispatch' ]]; then
echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
fi
- name: Docker image is ${{ steps.define-image.outputs.docker-image }}
run: echo "Docker image is ${{ steps.define-image.outputs.docker-image }}"
##########
## SDKs ##
##########
meilisearch-dotnet-tests:
needs: define-docker-image
name: .NET SDK tests
runs-on: ubuntu-latest
env:
MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dotnet
- name: Setup .NET Core
uses: actions/setup-dotnet@v3
with:
dotnet-version: "6.0.x"
- name: Install dependencies
run: dotnet restore
- name: Build
run: dotnet build --configuration Release --no-restore
- name: Meilisearch (latest version) setup with Docker
run: docker compose up -d
- name: Run tests
run: dotnet test --no-restore --verbosity normal
meilisearch-dart-tests:
needs: define-docker-image
name: Dart SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dart
- uses: dart-lang/setup-dart@v1
with:
sdk: 3.1.1
- name: Install dependencies
run: dart pub get
- name: Run integration tests
run: dart test --concurrency=4
meilisearch-go-tests:
needs: define-docker-image
name: Go SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: stable
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-go
- name: Get dependencies
run: |
go get -v -t -d ./...
if [ -f Gopkg.toml ]; then
curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
dep ensure
fi
- name: Run integration tests
run: go test -v ./...
meilisearch-java-tests:
needs: define-docker-image
name: Java SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-java
- name: Set up Java
uses: actions/setup-java@v3
with:
java-version: 8
distribution: 'zulu'
cache: gradle
- name: Grant execute permission for gradlew
run: chmod +x gradlew
- name: Build and run unit and integration tests
run: ./gradlew build integrationTest
meilisearch-js-tests:
needs: define-docker-image
@ -160,7 +48,7 @@ jobs:
with:
repository: meilisearch/meilisearch-js
- name: Setup node
uses: actions/setup-node@v4
uses: actions/setup-node@v3
with:
cache: 'yarn'
- name: Install dependencies
@ -178,6 +66,33 @@ jobs:
- name: Run Browser env
run: yarn test:env:browser
instant-meilisearch-tests:
needs: define-docker-image
name: instant-meilisearch tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/instant-meilisearch
- name: Setup node
uses: actions/setup-node@v3
with:
cache: yarn
- name: Install dependencies
run: yarn install
- name: Run tests
run: yarn test
- name: Build all the playgrounds and the packages
run: yarn build
meilisearch-php-tests:
needs: define-docker-image
name: PHP SDK tests
@ -196,6 +111,8 @@ jobs:
repository: meilisearch/meilisearch-php
- name: Install PHP
uses: shivammathur/setup-php@v2
with:
coverage: none
- name: Validate composer.json and composer.lock
run: composer validate
- name: Install dependencies
@ -232,6 +149,36 @@ jobs:
- name: Test with pytest
run: pipenv run pytest
meilisearch-go-tests:
needs: define-docker-image
name: Go SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: stable
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-go
- name: Get dependencies
run: |
go get -v -t -d ./...
if [ -f Gopkg.toml ]; then
curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
dep ensure
fi
- name: Run integration tests
run: go test -v ./...
meilisearch-ruby-tests:
needs: define-docker-image
name: Ruby SDK tests
@ -277,110 +224,3 @@ jobs:
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose
meilisearch-swift-tests:
needs: define-docker-image
name: Swift SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-swift
- name: Run tests
run: swift test
########################
## FRONT-END PLUGINS ##
########################
meilisearch-js-plugins-tests:
needs: define-docker-image
name: meilisearch-js-plugins tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js-plugins
- name: Setup node
uses: actions/setup-node@v4
with:
cache: yarn
- name: Install dependencies
run: yarn install
- name: Run tests
run: yarn test
- name: Build all the playgrounds and the packages
run: yarn build
########################
## BACK-END PLUGINS ###
########################
meilisearch-rails-tests:
needs: define-docker-image
name: meilisearch-rails tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rails
- name: Set up Ruby 3
uses: ruby/setup-ruby@v1
with:
ruby-version: 3
bundler-cache: true
- name: Run tests
run: bundle exec rspec
meilisearch-symfony-tests:
needs: define-docker-image
name: meilisearch-symfony tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-symfony
- name: Install PHP
uses: shivammathur/setup-php@v2
with:
tools: composer:v2, flex
- name: Validate composer.json and composer.lock
run: composer validate
- name: Install dependencies
run: composer install --prefer-dist --no-progress --quiet
- name: Remove doctrine/annotations
run: composer remove --dev doctrine/annotations
- name: Run test suite
run: composer test:unit

View File

@ -43,7 +43,7 @@ jobs:
toolchain: nightly
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.5.1
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@ -65,7 +65,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.5.1
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@ -123,10 +123,7 @@ jobs:
override: true
- name: Run cargo tree without default features and check lindera is not present
run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -vqz lindera; then
echo "lindera has been found in the sources and it shouldn't"
exit 1
fi
cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz
- name: Run cargo tree with default features and check lindera is pressent
run: |
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
@ -149,7 +146,7 @@ jobs:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.5.1
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@ -168,7 +165,7 @@ jobs:
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.5.1
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@ -187,7 +184,7 @@ jobs:
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.5.1
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

963
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,6 @@
resolver = "2"
members = [
"meilisearch",
"meilitool",
"meilisearch-types",
"meilisearch-auth",
"meili-snap",
@ -19,7 +18,7 @@ members = [
]
[workspace.package]
version = "1.5.0"
version = "1.4.2"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"

View File

@ -3,7 +3,7 @@ FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
WORKDIR /
WORKDIR /meilisearch
ARG COMMIT_SHA
ARG COMMIT_DATE
@ -17,7 +17,7 @@ RUN set -eux; \
if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release -p meilisearch -p meilitool
cargo build --release
# Run
FROM alpine:3.16
@ -28,10 +28,9 @@ ENV MEILI_SERVER_PROVIDER docker
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
# and it's easy to find.
COPY --from=compiler /target/release/meilisearch /bin/meilisearch
COPY --from=compiler /target/release/meilitool /bin/meilitool
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
# To stay compatible with the older version of the container (pre v0.27.0) we're
# going to symlink the meilisearch binary in the path to `/meilisearch`
RUN ln -s /bin/meilisearch /meilisearch

View File

@ -1,14 +1,14 @@
# Profiling Meilisearch
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
![An example profiling with Puffin viewer](assets/profiling-example.png)
## Profiling the Indexing Process
When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.

View File

@ -6,7 +6,9 @@ use std::path::Path;
use criterion::{criterion_group, criterion_main, Criterion};
use milli::heed::{EnvOpenOptions, RwTxn};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::update::{
DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
};
use milli::Index;
use rand::seq::SliceRandom;
use rand_chacha::rand_core::SeedableRng;
@ -264,7 +266,17 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete)
},
move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete)
let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});
@ -601,7 +613,17 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete)
},
move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete)
let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});
@ -853,31 +875,22 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete)
},
move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete)
let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});
}
fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBitmap>) {
let mut wtxn = index.write_txn().unwrap();
let indexer_config = IndexerConfig::default();
for ids in document_ids_to_delete {
let config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &indexer_config, config, |_| (), || false)
.unwrap();
(builder, _) = builder.remove_documents_from_db_no_batch(&ids).unwrap();
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
}
fn indexing_movies_in_three_batches(c: &mut Criterion) {
let mut group = c.benchmark_group("indexing");
group.sample_size(BENCHMARK_ITERATION);
@ -1099,7 +1112,17 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete)
},
move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete)
let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});
@ -1315,7 +1338,17 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete)
},
move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete)
let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});

View File

@ -526,12 +526,12 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2022-10-09T20:27:22.688964637Z",
"updatedAt": "2022-10-09T20:27:23.951017769Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@ -541,12 +541,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2022-10-09T20:27:22.197788495Z",
"updatedAt": "2022-10-09T20:28:01.93111053Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@ -571,12 +571,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells
insta::assert_json_snapshot!(spells.metadata(), @r###"
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2022-10-09T20:27:24.242683494Z",
"updatedAt": "2022-10-09T20:27:24.312809641Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@ -617,12 +617,12 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2023-01-30T16:25:56.595257Z",
"updatedAt": "2023-01-30T16:25:58.70348Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@ -632,12 +632,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2023-01-30T16:25:56.192178Z",
"updatedAt": "2023-01-30T16:25:56.455714Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@ -647,12 +647,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
// spells
insta::assert_json_snapshot!(spells.metadata(), @r###"
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2023-01-30T16:25:58.876405Z",
"updatedAt": "2023-01-30T16:25:59.079906Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);

View File

@ -0,0 +1,24 @@
---
source: dump/src/reader/mod.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@ -0,0 +1,38 @@
---
source: dump/src/reader/mod.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@ -0,0 +1,31 @@
---
source: dump/src/reader/mod.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"genres",
"id"
],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@ -46,7 +46,6 @@ pub type Checked = settings::Checked;
pub type Unchecked = settings::Unchecked;
pub type Task = updates::UpdateEntry;
pub type Kind = updates::UpdateMeta;
// everything related to the errors
pub type ResponseError = errors::ResponseError;
@ -108,11 +107,8 @@ impl V2Reader {
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
V2IndexReader::new(
index.uid.clone(),
&self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
index,
BufReader::new(
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
),
)
}))
}
@ -147,41 +143,16 @@ pub struct V2IndexReader {
}
impl V2IndexReader {
pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
pub fn new(name: String, path: &Path) -> Result<Self> {
let meta = File::open(path.join("meta.json"))?;
let meta: DumpMeta = serde_json::from_reader(meta)?;
let mut created_at = None;
let mut updated_at = None;
for line in tasks.lines() {
let task: Task = serde_json::from_str(&line?)?;
if !(task.uuid == index_uuid.uuid && task.is_finished()) {
continue;
}
let new_created_at = match task.update.meta() {
Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
_ => None,
};
let new_updated_at = task.update.finished_at();
if created_at.is_none() || created_at > new_created_at {
created_at = new_created_at;
}
if updated_at.is_none() || updated_at < new_updated_at {
updated_at = new_updated_at;
}
}
let current_time = OffsetDateTime::now_utc();
let metadata = IndexMetadata {
uid: index_uuid.uid.clone(),
uid: name,
primary_key: meta.primary_key,
created_at: created_at.unwrap_or(current_time),
updated_at: updated_at.unwrap_or(current_time),
// FIXME: Iterate over the whole task queue to find the creation and last update date.
created_at: OffsetDateTime::now_utc(),
updated_at: OffsetDateTime::now_utc(),
};
let ret = V2IndexReader {
@ -277,12 +248,12 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2022-10-09T20:27:22.688964637Z",
"updatedAt": "2022-10-09T20:27:23.951017769Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@ -292,12 +263,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2022-10-09T20:27:22.197788495Z",
"updatedAt": "2022-10-09T20:28:01.93111053Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@ -322,12 +293,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells
insta::assert_json_snapshot!(spells.metadata(), @r###"
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2022-10-09T20:27:24.242683494Z",
"updatedAt": "2022-10-09T20:27:24.312809641Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@ -369,12 +340,12 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2023-01-30T16:25:56.595257Z",
"updatedAt": "2023-01-30T16:25:58.70348Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@ -384,12 +355,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2023-01-30T16:25:56.192178Z",
"updatedAt": "2023-01-30T16:25:56.455714Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@ -399,12 +370,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
// spells
insta::assert_json_snapshot!(spells.metadata(), @r###"
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2023-01-30T16:25:58.876405Z",
"updatedAt": "2023-01-30T16:25:59.079906Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);

View File

@ -227,14 +227,4 @@ impl UpdateStatus {
_ => None,
}
}
pub fn finished_at(&self) -> Option<OffsetDateTime> {
match self {
UpdateStatus::Processing(_) => None,
UpdateStatus::Enqueued(_) => None,
UpdateStatus::Processed(u) => Some(u.processed_at),
UpdateStatus::Aborted(_) => None,
UpdateStatus::Failed(u) => Some(u.failed_at),
}
}
}

View File

@ -18,12 +18,11 @@ derive_builder = "0.12.0"
dump = { path = "../dump" }
enum-iterator = "1.4.0"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
puffin = { version = "0.16.0", features = ["serialization"] }
puffin = "0.16.0"
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
@ -31,7 +30,6 @@ synchronoise = "1.0.1"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
ureq = "2.9.1"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]

View File

@ -19,18 +19,18 @@ one indexing operation.
use std::collections::{BTreeSet, HashSet};
use std::ffi::OsStr;
use std::fmt;
use std::fs::{self, File};
use std::io::BufWriter;
use dump::IndexMetadata;
use log::{debug, error, info, trace};
use log::{debug, error, info};
use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Settings as MilliSettings,
};
use meilisearch_types::milli::{self, Filter, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
@ -43,7 +43,7 @@ use uuid::Uuid;
use crate::autobatcher::{self, BatchKind};
use crate::utils::{self, swap_index_uid_in_task};
use crate::{Error, IndexScheduler, MustStopProcessing, ProcessingTasks, Result, TaskId};
use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
/// Represents a combination of tasks that can all be processed at the same time.
///
@ -104,6 +104,12 @@ pub(crate) enum IndexOperation {
operations: Vec<DocumentOperation>,
tasks: Vec<Task>,
},
DocumentDeletion {
index_uid: String,
// The vec associated with each document deletion tasks.
documents: Vec<Vec<String>>,
tasks: Vec<Task>,
},
IndexDocumentDeletionByFilter {
index_uid: String,
task: Task,
@ -155,6 +161,7 @@ impl Batch {
}
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. }
| IndexOperation::DocumentDeletion { tasks, .. }
| IndexOperation::Settings { tasks, .. }
| IndexOperation::DocumentClear { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
@ -192,33 +199,11 @@ impl Batch {
}
}
impl fmt::Display for Batch {
/// A text used when we debug the profiling reports.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let index_uid = self.index_uid();
let tasks = self.ids();
match self {
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
Batch::Dump(_) => f.write_str("Dump")?,
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
};
match index_uid {
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
}
}
}
impl IndexOperation {
pub fn index_uid(&self) -> &str {
match self {
IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
@ -228,27 +213,6 @@ impl IndexOperation {
}
}
impl fmt::Display for IndexOperation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IndexOperation::DocumentOperation { .. } => {
f.write_str("IndexOperation::DocumentOperation")
}
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
}
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
IndexOperation::DocumentClearAndSetting { .. } => {
f.write_str("IndexOperation::DocumentClearAndSetting")
}
IndexOperation::SettingsAndDocumentOperation { .. } => {
f.write_str("IndexOperation::SettingsAndDocumentOperation")
}
}
}
}
impl IndexScheduler {
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
///
@ -336,27 +300,18 @@ impl IndexScheduler {
BatchKind::DocumentDeletion { deletion_ids } => {
let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;
let mut operations = Vec::with_capacity(tasks.len());
let mut documents_counts = Vec::with_capacity(tasks.len());
let mut documents = Vec::new();
for task in &tasks {
match task.kind {
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
operations.push(DocumentOperation::Delete(documents_ids.clone()));
documents_counts.push(documents_ids.len() as u64);
documents.push(documents_ids.clone())
}
_ => unreachable!(),
}
}
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentOperation {
index_uid,
primary_key: None,
method: IndexDocumentsMethod::ReplaceDocuments,
documents_counts,
operations,
tasks,
},
op: IndexOperation::DocumentDeletion { index_uid, documents, tasks },
must_create_index,
}))
}
@ -626,7 +581,7 @@ impl IndexScheduler {
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
}
puffin::profile_function!(batch.to_string());
puffin::profile_function!(format!("{:?}", batch));
match batch {
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
@ -822,10 +777,6 @@ impl IndexScheduler {
// 2. dump the tasks
let mut dump_tasks = dump.create_tasks_queue()?;
for ret in self.all_tasks.iter(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_, mut t) = ret?;
let status = t.status;
let content_file = t.content_uuid();
@ -846,9 +797,6 @@ impl IndexScheduler {
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
if let Some(content_file) = content_file {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
if status == Status::Enqueued {
let content_file = self.file_store.get_update(content_file)?;
@ -888,9 +836,6 @@ impl IndexScheduler {
// 3.1. Dump the documents
for ret in index.all_documents(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_id, doc) = ret?;
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?;
@ -903,16 +848,13 @@ impl IndexScheduler {
})?;
// 4. Dump experimental feature settings
let features = self.features().runtime_features();
let features = self.features()?.runtime_features();
dump.create_experimental_features(features)?;
let dump_uid = started_at.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
)).unwrap();
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
let file = File::create(path)?;
dump.persist_to(BufWriter::new(file))?;
@ -933,10 +875,6 @@ impl IndexScheduler {
self.index_mapper.index(&rtxn, &index_uid)?
};
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
*self.currently_updating_index.write().unwrap() =
Some((index_uid.clone(), index.clone()));
let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
index_wtxn.commit()?;
@ -1205,7 +1143,7 @@ impl IndexScheduler {
index,
indexer_config,
config,
|indexing_step| trace!("update: {:?}", indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;
@ -1252,8 +1190,7 @@ impl IndexScheduler {
let (new_builder, user_result) =
builder.remove_documents(document_ids)?;
builder = new_builder;
// Uses Invariant: remove documents actually always returns Ok for the inner result
let count = user_result.unwrap();
let provided_ids =
if let Some(Details::DocumentDeletion { provided_ids, .. }) =
task.details
@ -1264,11 +1201,23 @@ impl IndexScheduler {
unreachable!();
};
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(count),
});
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(count),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
}
}
@ -1283,13 +1232,31 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.reset_primary_key();
builder.execute(
|indexing_step| trace!("update: {:?}", indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.clone().get(),
)?;
}
Ok(tasks)
}
IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
let mut builder = milli::update::DeleteDocuments::new(index_wtxn, index)?;
documents.iter().flatten().for_each(|id| {
builder.delete_external_id(id);
});
let DocumentDeletionResult { deleted_documents, .. } = builder.execute()?;
for (task, documents) in tasks.iter_mut().zip(documents) {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids: documents.len(),
deleted_documents: Some(deleted_documents.min(documents.len() as u64)),
});
}
Ok(tasks)
}
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let filter =
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
@ -1299,13 +1266,7 @@ impl IndexScheduler {
} else {
unreachable!()
};
let deleted_documents = delete_document_by_filter(
index_wtxn,
filter,
self.index_mapper.indexer_config(),
self.must_stop_processing.clone(),
index,
);
let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: _,
@ -1539,8 +1500,6 @@ impl IndexScheduler {
fn delete_document_by_filter<'a>(
wtxn: &mut RwTxn<'a, '_>,
filter: &serde_json::Value,
indexer_config: &IndexerConfig,
must_stop_processing: MustStopProcessing,
index: &'a Index,
) -> Result<u64> {
let filter = Filter::from_json(filter)?;
@ -1551,26 +1510,9 @@ fn delete_document_by_filter<'a>(
}
e => e.into(),
})?;
let config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let mut builder = milli::update::IndexDocuments::new(
wtxn,
index,
indexer_config,
config,
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;
let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?;
builder = new_builder;
let _ = builder.execute()?;
count
let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
delete_operation.delete_documents(&candidates);
delete_operation.execute().map(|result| result.deleted_documents)?
} else {
0
})

View File

@ -108,8 +108,6 @@ pub enum Error {
TaskDeletionWithEmptyQuery,
#[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
TaskCancelationWithEmptyQuery,
#[error("Aborted task")]
AbortedTask,
#[error(transparent)]
Dump(#[from] dump::Error),
@ -177,7 +175,6 @@ impl Error {
| Error::TaskNotFound(_)
| Error::TaskDeletionWithEmptyQuery
| Error::TaskCancelationWithEmptyQuery
| Error::AbortedTask
| Error::Dump(_)
| Error::Heed(_)
| Error::Milli(_)
@ -239,9 +236,6 @@ impl ErrorCode for Error {
Error::TaskDatabaseUpdate(_) => Code::Internal,
Error::CreateBatch(_) => Code::Internal,
// This one should never be seen by the end user
Error::AbortedTask => Code::Internal,
#[cfg(test)]
Error::PlannedFailure => Code::Internal,
}

View File

@ -1,8 +1,6 @@
use std::sync::{Arc, RwLock};
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use crate::error::FeatureNotEnabledError;
use crate::Result;
@ -11,19 +9,20 @@ const EXPERIMENTAL_FEATURES: &str = "experimental-features";
#[derive(Clone)]
pub(crate) struct FeatureData {
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
instance: InstanceTogglableFeatures,
}
#[derive(Debug, Clone, Copy)]
pub struct RoFeatures {
runtime: RuntimeTogglableFeatures,
instance: InstanceTogglableFeatures,
}
impl RoFeatures {
fn new(data: &FeatureData) -> Self {
let runtime = data.runtime_features();
Self { runtime }
fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
let runtime = data.runtime_features(txn)?;
Ok(Self { runtime, instance: data.instance })
}
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
@ -44,13 +43,13 @@ impl RoFeatures {
}
pub fn check_metrics(&self) -> Result<()> {
if self.runtime.metrics {
if self.instance.metrics {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Getting metrics",
feature: "metrics",
issue_link: "https://github.com/meilisearch/product/discussions/625",
issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
}
.into())
}
@ -68,36 +67,15 @@ impl RoFeatures {
.into())
}
}
pub fn check_puffin(&self) -> Result<()> {
if self.runtime.export_puffin_reports {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Outputting Puffin reports to disk",
feature: "export puffin reports",
issue_link: "https://github.com/meilisearch/product/discussions/693",
}
.into())
}
}
}
impl FeatureData {
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
let mut wtxn = env.write_txn()?;
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
wtxn.commit()?;
let txn = env.read_txn()?;
let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: instance_features.metrics || persisted_features.metrics,
..persisted_features
}));
Ok(Self { persisted: runtime_features_db, runtime })
Ok(Self { runtime: runtime_features, instance: instance_features })
}
pub fn put_runtime_features(
@ -105,25 +83,16 @@ impl FeatureData {
mut wtxn: RwTxn,
features: RuntimeTogglableFeatures,
) -> Result<()> {
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
wtxn.commit()?;
// safe to unwrap, the lock will only fail if:
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
// 2. there's a panic while the thread is held -> it is only used for an assignment here.
let mut toggled_features = self.runtime.write().unwrap();
*toggled_features = features;
Ok(())
}
fn runtime_features(&self) -> RuntimeTogglableFeatures {
// sound to unwrap, the lock will only fail if:
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
// 2. there's a panic while the thread is held -> it is only used for copying the data here
*self.runtime.read().unwrap()
fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
}
pub fn features(&self) -> RoFeatures {
RoFeatures::new(self)
pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
RoFeatures::new(txn, self)
}
}

View File

@ -30,17 +30,14 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
index_mapper,
features: _,
max_number_of_tasks: _,
puffin_frame: _,
wake_up: _,
dumps_path: _,
snapshots_path: _,
auth_path: _,
version_file_path: _,
webhook_url: _,
test_breakpoint_sdr: _,
planned_failures: _,
run_loop_iteration: _,
currently_updating_index: _,
} = scheduler;
let rtxn = env.read_txn().unwrap();

View File

@ -27,14 +27,12 @@ mod index_mapper;
mod insta_snapshot;
mod lru;
mod utils;
pub mod uuid_codec;
mod uuid_codec;
pub type Result<T> = std::result::Result<T, Error>;
pub type TaskId = u32;
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::{self, BufReader, Read};
use std::ops::{Bound, RangeBounds};
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
@ -46,8 +44,6 @@ use dump::{KindDump, TaskDump, UpdateFile};
pub use error::Error;
pub use features::RoFeatures;
use file_store::FileStore;
use flate2::bufread::GzEncoder;
use flate2::Compression;
use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
@ -55,9 +51,7 @@ use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use puffin::FrameView;
use roaring::RoaringBitmap;
use synchronoise::SignalEvent;
use time::format_description::well_known::Rfc3339;
@ -173,8 +167,8 @@ impl ProcessingTasks {
}
/// Set the processing tasks to an empty list
fn stop_processing(&mut self) -> RoaringBitmap {
std::mem::take(&mut self.processing)
fn stop_processing(&mut self) {
self.processing = RoaringBitmap::new();
}
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
@ -244,7 +238,6 @@ pub struct IndexSchedulerOptions {
pub snapshots_path: PathBuf,
/// The path to the folder containing the dumps.
pub dumps_path: PathBuf,
pub webhook_url: Option<String>,
/// The maximum size, in bytes, of the task index.
pub task_db_size: usize,
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
@ -321,12 +314,6 @@ pub struct IndexScheduler {
/// the finished tasks automatically.
pub(crate) max_number_of_tasks: usize,
/// The webhook url we should send tasks to after processing every batches.
pub(crate) webhook_url: Option<String>,
/// A frame to output the indexation profiling files to disk.
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
/// The path used to create the dumps.
pub(crate) dumps_path: PathBuf,
@ -339,10 +326,6 @@ pub struct IndexScheduler {
/// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf,
/// A few types of long running batches of tasks that act on a single index set this field
/// so that a handle to the index is available from other threads (search) in an optimized manner.
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
// ================= test
// The next entry is dedicated to the tests.
/// Provide a way to set a breakpoint in multiple part of the scheduler.
@ -381,13 +364,10 @@ impl IndexScheduler {
wake_up: self.wake_up.clone(),
autobatching_enabled: self.autobatching_enabled,
max_number_of_tasks: self.max_number_of_tasks,
puffin_frame: self.puffin_frame.clone(),
snapshots_path: self.snapshots_path.clone(),
dumps_path: self.dumps_path.clone(),
auth_path: self.auth_path.clone(),
version_file_path: self.version_file_path.clone(),
webhook_url: self.webhook_url.clone(),
currently_updating_index: self.currently_updating_index.clone(),
#[cfg(test)]
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
#[cfg(test)]
@ -477,15 +457,12 @@ impl IndexScheduler {
env,
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
wake_up: Arc::new(SignalEvent::auto(true)),
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
autobatching_enabled: options.autobatching_enabled,
max_number_of_tasks: options.max_number_of_tasks,
dumps_path: options.dumps_path,
snapshots_path: options.snapshots_path,
auth_path: options.auth_path,
version_file_path: options.version_file_path,
webhook_url: options.webhook_url,
currently_updating_index: Arc::new(RwLock::new(None)),
#[cfg(test)]
test_breakpoint_sdr,
@ -595,46 +572,17 @@ impl IndexScheduler {
run.wake_up.wait();
loop {
let puffin_enabled = run.features().check_puffin().is_ok();
puffin::set_scopes_on(puffin_enabled);
puffin::GlobalProfiler::lock().new_frame();
match run.tick() {
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Err(e) => {
log::error!("{e}");
log::error!("{}", e);
// Wait one second when an irrecoverable error occurs.
if !e.is_recoverable() {
std::thread::sleep(Duration::from_secs(1));
}
}
}
// Let's write the previous frame to disk but only if
// the user wanted to profile with puffin.
if puffin_enabled {
let mut frame_view = run.puffin_frame.lock();
if !frame_view.is_empty() {
let now = OffsetDateTime::now_utc();
let mut file = match File::create(format!("{}.puffin", now)) {
Ok(file) => file,
Err(e) => {
log::error!("{e}");
continue;
}
};
if let Err(e) = frame_view.save_to_writer(&mut file) {
log::error!("{e}");
}
if let Err(e) = file.sync_all() {
log::error!("{e}");
}
// We erase this frame view as it is no more useful. We want to
// measure the new frames now that we exported the previous ones.
*frame_view = FrameView::default();
}
}
}
})
.unwrap();
@ -668,13 +616,6 @@ impl IndexScheduler {
/// If you need to fetch information from or perform an action on all indexes,
/// see the `try_for_each_index` function.
pub fn index(&self, name: &str) -> Result<Index> {
if let Some((current_name, current_index)) =
self.currently_updating_index.read().unwrap().as_ref()
{
if current_name == name {
return Ok(current_index.clone());
}
}
let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, name)
}
@ -1121,6 +1062,8 @@ impl IndexScheduler {
self.breakpoint(Breakpoint::Start);
}
puffin::GlobalProfiler::lock().new_frame();
self.cleanup_task_queue()?;
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@ -1156,9 +1099,6 @@ impl IndexScheduler {
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
};
// Reset the currently updating index to relinquish the index handle
*self.currently_updating_index.write().unwrap() = None;
#[cfg(test)]
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
@ -1193,8 +1133,7 @@ impl IndexScheduler {
// If we have an abortion error we must stop the tick here and re-schedule tasks.
Err(Error::Milli(milli::Error::InternalError(
milli::InternalError::AbortedIndexation,
)))
| Err(Error::AbortedTask) => {
))) => {
#[cfg(test)]
self.breakpoint(Breakpoint::AbortedIndexation);
wtxn.abort().map_err(Error::HeedTransaction)?;
@ -1250,92 +1189,19 @@ impl IndexScheduler {
}
}
let processed = self.processing_tasks.write().unwrap().stop_processing();
self.processing_tasks.write().unwrap().stop_processing();
#[cfg(test)]
self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;
wtxn.commit().map_err(Error::HeedTransaction)?;
// We shouldn't crash the tick function if we can't send data to the webhook.
let _ = self.notify_webhook(&processed);
#[cfg(test)]
self.breakpoint(Breakpoint::AfterProcessing);
Ok(TickOutcome::TickAgain(processed_tasks))
}
/// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
if let Some(ref url) = self.webhook_url {
struct TaskReader<'a, 'b> {
rtxn: &'a RoTxn<'a>,
index_scheduler: &'a IndexScheduler,
tasks: &'b mut roaring::bitmap::Iter<'b>,
buffer: Vec<u8>,
written: usize,
}
impl<'a, 'b> Read for TaskReader<'a, 'b> {
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
if self.buffer.is_empty() {
match self.tasks.next() {
None => return Ok(0),
Some(task_id) => {
let task = self
.index_scheduler
.get_task(self.rtxn, task_id)
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::Other,
Error::CorruptedTaskQueue,
)
})?;
serde_json::to_writer(
&mut self.buffer,
&TaskView::from_task(&task),
)?;
self.buffer.push(b'\n');
}
}
}
let mut to_write = &self.buffer[self.written..];
let wrote = io::copy(&mut to_write, &mut buf)?;
self.written += wrote as usize;
// we wrote everything and must refresh our buffer on the next call
if self.written == self.buffer.len() {
self.written = 0;
self.buffer.clear();
}
Ok(wrote as usize)
}
}
let rtxn = self.env.read_txn()?;
let task_reader = TaskReader {
rtxn: &rtxn,
index_scheduler: self,
tasks: &mut updated.into_iter(),
buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
written: 0,
};
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
if let Err(e) = ureq::post(url).set("Content-Encoding", "gzip").send(reader) {
log::error!("While sending data to the webhook: {e}");
}
}
Ok(())
}
/// Register a task to cleanup the task queue if needed
fn cleanup_task_queue(&self) -> Result<()> {
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@ -1393,8 +1259,9 @@ impl IndexScheduler {
Ok(IndexStats { is_indexing, inner_stats: index_stats })
}
pub fn features(&self) -> RoFeatures {
self.features.features()
pub fn features(&self) -> Result<RoFeatures> {
let rtxn = self.read_txn()?;
self.features.features(rtxn)
}
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
@ -1715,7 +1582,6 @@ mod tests {
indexes_path: tempdir.path().join("indexes"),
snapshots_path: tempdir.path().join("snapshots"),
dumps_path: tempdir.path().join("dumps"),
webhook_url: None,
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
enable_mdb_writemap: false,
@ -4424,26 +4290,4 @@ mod tests {
}
"###);
}
#[test]
fn cancel_processing_dump() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
let dump_cancellation = KindWithContent::TaskCancelation {
query: "cancel dump".to_owned(),
tasks: RoaringBitmap::from_iter([0]),
};
let _ = index_scheduler.register(dump_creation).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
let _ = index_scheduler.register(dump_cancellation).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
}
}

View File

@ -1,35 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,45 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [1,]
canceled [0,]
----------------------------------------------------------------------
### Kind:
"taskCancelation" [1,]
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
1 [0,]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,38 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[0,]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"taskCancelation" [1,]
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -129,9 +129,6 @@ impl HeedAuthStore {
Action::DumpsAll => {
actions.insert(Action::DumpsCreate);
}
Action::SnapshotsAll => {
actions.insert(Action::SnapshotsCreate);
}
Action::TasksAll => {
actions.extend([Action::TasksGet, Action::TasksDelete, Action::TasksCancel]);
}

View File

@ -50,7 +50,6 @@ hebrew = ["milli/hebrew"]
japanese = ["milli/japanese"]
# thai specialized tokenization
thai = ["milli/thai"]
# allow greek specialized tokenization
greek = ["milli/greek"]
# allow khmer specialized tokenization
khmer = ["milli/khmer"]

View File

@ -324,6 +324,7 @@ impl ErrorCode for milli::Error {
UserError::SerdeJson(_)
| UserError::InvalidLmdbOpenOptions
| UserError::DocumentLimitReached
| UserError::AccessingSoftDeletedDocument { .. }
| UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStoreFile,
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,

View File

@ -5,8 +5,6 @@ use serde::{Deserialize, Serialize};
pub struct RuntimeTogglableFeatures {
pub score_details: bool,
pub vector_store: bool,
pub metrics: bool,
pub export_puffin_reports: bool,
}
#[derive(Default, Debug, Clone, Copy)]

View File

@ -257,12 +257,6 @@ pub enum Action {
#[serde(rename = "dumps.create")]
#[deserr(rename = "dumps.create")]
DumpsCreate,
#[serde(rename = "snapshots.*")]
#[deserr(rename = "snapshots.*")]
SnapshotsAll,
#[serde(rename = "snapshots.create")]
#[deserr(rename = "snapshots.create")]
SnapshotsCreate,
#[serde(rename = "version")]
#[deserr(rename = "version")]
Version,
@ -315,7 +309,6 @@ impl Action {
METRICS_GET => Some(Self::MetricsGet),
DUMPS_ALL => Some(Self::DumpsAll),
DUMPS_CREATE => Some(Self::DumpsCreate),
SNAPSHOTS_CREATE => Some(Self::SnapshotsCreate),
VERSION => Some(Self::Version),
KEYS_CREATE => Some(Self::KeysAdd),
KEYS_GET => Some(Self::KeysGet),
@ -360,7 +353,6 @@ pub mod actions {
pub const METRICS_GET: u8 = MetricsGet.repr();
pub const DUMPS_ALL: u8 = DumpsAll.repr();
pub const DUMPS_CREATE: u8 = DumpsCreate.repr();
pub const SNAPSHOTS_CREATE: u8 = SnapshotsCreate.repr();
pub const VERSION: u8 = Version.repr();
pub const KEYS_CREATE: u8 = KeysAdd.repr();
pub const KEYS_GET: u8 = KeysGet.repr();

View File

@ -9,7 +9,6 @@ pub mod index_uid_pattern;
pub mod keys;
pub mod settings;
pub mod star_or;
pub mod task_view;
pub mod tasks;
pub mod versioning;
pub use milli::{heed, Index};

View File

@ -1,139 +0,0 @@
use serde::Serialize;
use time::{Duration, OffsetDateTime};
use crate::error::ResponseError;
use crate::settings::{Settings, Unchecked};
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskView {
pub uid: TaskId,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
#[serde(rename = "type")]
pub kind: Kind,
pub canceled_by: Option<TaskId>,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<DetailsView>,
pub error: Option<ResponseError>,
#[serde(serialize_with = "serialize_duration", default)]
pub duration: Option<Duration>,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option", default)]
pub started_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339::option", default)]
pub finished_at: Option<OffsetDateTime>,
}
impl TaskView {
pub fn from_task(task: &Task) -> TaskView {
TaskView {
uid: task.uid,
index_uid: task.index_uid().map(ToOwned::to_owned),
status: task.status,
kind: task.kind.as_kind(),
canceled_by: task.canceled_by,
details: task.details.clone().map(DetailsView::from),
error: task.error.clone(),
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
}
}
}
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub received_documents: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indexed_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub provided_ids: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub matched_tasks: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub canceled_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub original_filter: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub settings: Option<Box<Settings<Unchecked>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub swaps: Option<Vec<IndexSwap>>,
}
impl From<Details> for DetailsView {
fn from(details: Details) -> Self {
match details {
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
DetailsView {
received_documents: Some(received_documents),
indexed_documents: Some(indexed_documents),
..DetailsView::default()
}
}
Details::SettingsUpdate { settings } => {
DetailsView { settings: Some(settings), ..DetailsView::default() }
}
Details::IndexInfo { primary_key } => {
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
}
Details::DocumentDeletion {
provided_ids: received_document_ids,
deleted_documents,
} => DetailsView {
provided_ids: Some(received_document_ids),
deleted_documents: Some(deleted_documents),
original_filter: Some(None),
..DetailsView::default()
},
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
DetailsView {
provided_ids: Some(0),
original_filter: Some(Some(original_filter)),
deleted_documents: Some(deleted_documents),
..DetailsView::default()
}
}
Details::ClearAll { deleted_documents } => {
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
}
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
canceled_tasks: Some(canceled_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
deleted_tasks: Some(deleted_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::Dump { dump_uid } => {
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
}
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
}
}
}

View File

@ -69,7 +69,8 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.9"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.3", features = ["process"] }
puffin = { version = "0.16.0", features = ["serialization"] }
puffin = "0.16.0"
puffin_http = { version = "0.13.0", optional = true }
rand = "0.8.5"
rayon = "1.7.0"
regex = "1.7.3"
@ -104,7 +105,6 @@ walkdir = "2.3.3"
yaup = "0.2.1"
serde_urlencoded = "0.7.1"
termcolor = "1.2.0"
url = { version = "2.5.0", features = ["serde"] }
[dev-dependencies]
actix-rt = "2.8.0"
@ -135,6 +135,7 @@ zip = { version = "0.6.4", optional = true }
[features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
analytics = ["segment"]
profile-with-puffin = ["dep:puffin_http"]
mini-dashboard = [
"actix-web-static-files",
"static-files",
@ -151,7 +152,6 @@ hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"]
greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"

View File

@ -1,5 +1,6 @@
mod mock_analytics;
#[cfg(feature = "analytics")]
// if we are in release mode and the feature analytics was enabled
#[cfg(all(not(debug_assertions), feature = "analytics"))]
mod segment_analytics;
use std::fs;
@ -16,25 +17,26 @@ use serde_json::Value;
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::tasks::TasksFilterQuery;
// if the analytics feature is disabled
// if we are in debug mode OR the analytics feature is disabled
// the `SegmentAnalytics` point to the mock instead of the real analytics
#[cfg(not(feature = "analytics"))]
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type SegmentAnalytics = mock_analytics::MockAnalytics;
#[cfg(not(feature = "analytics"))]
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type SearchAggregator = mock_analytics::SearchAggregator;
#[cfg(not(feature = "analytics"))]
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
#[cfg(not(feature = "analytics"))]
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
// if the feature analytics is enabled we use the real analytics
#[cfg(feature = "analytics")]
// if we are in release mode and the feature analytics was enabled
// we use the real analytics
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
#[cfg(feature = "analytics")]
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type SearchAggregator = segment_analytics::SearchAggregator;
#[cfg(feature = "analytics")]
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
#[cfg(feature = "analytics")]
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
/// The Meilisearch config dir:

File diff suppressed because it is too large Load Diff

View File

@ -114,7 +114,10 @@ pub fn create_app(
.configure(routes::configure)
.configure(|s| dashboard(s, enable_dashboard));
let app = app.wrap(middleware::RouteMetrics);
let app = app.wrap(actix_web::middleware::Condition::new(
opt.experimental_enable_metrics,
middleware::RouteMetrics,
));
app.wrap(
Cors::default()
.send_wildcard()
@ -228,7 +231,6 @@ fn open_or_create_database_unchecked(
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
@ -363,7 +365,7 @@ fn import_dump(
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| log::trace!("update: {:?}", indexing_step),
|indexing_step| log::debug!("update: {:?}", indexing_step),
|| false,
)?;
@ -398,7 +400,6 @@ pub fn configure_data(
.app_data(web::Data::from(analytics))
.app_data(
web::JsonConfig::default()
.limit(http_payload_size_limit)
.content_type(|mime| mime == mime::APPLICATION_JSON)
.error_handler(|err, req: &HttpRequest| match err {
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {

View File

@ -30,6 +30,10 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
#[cfg(feature = "profile-with-puffin")]
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"

View File

@ -3,10 +3,8 @@
use std::future::{ready, Ready};
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
use actix_web::web::Data;
use actix_web::Error;
use futures_util::future::LocalBoxFuture;
use index_scheduler::IndexScheduler;
use prometheus::HistogramTimer;
pub struct RouteMetrics;
@ -49,27 +47,19 @@ where
fn call(&self, req: ServiceRequest) -> Self::Future {
let mut histogram_timer: Option<HistogramTimer> = None;
// calling unwrap here is safe because index scheduler is added to app data while creating actix app.
// also, the tests will fail if this is not present.
let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
let features = index_scheduler.features();
if features.check_metrics().is_ok() {
let request_path = req.path();
let is_registered_resource = req.resource_map().has_resource(request_path);
if is_registered_resource {
let request_method = req.method().to_string();
histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, request_path])
.start_timer(),
);
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
let request_path = req.path();
let is_registered_resource = req.resource_map().has_resource(request_path);
if is_registered_resource {
let request_method = req.method().to_string();
histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, request_path])
.inc();
}
};
.start_timer(),
);
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
.with_label_values(&[&request_method, request_path])
.inc();
}
let fut = self.service.call(req);

View File

@ -21,7 +21,6 @@ use rustls::RootCertStore;
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use serde::{Deserialize, Serialize};
use sysinfo::{RefreshKind, System, SystemExt};
use url::Url;
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
@ -29,8 +28,7 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
const MEILI_ENV: &str = "MEILI_ENV";
const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
#[cfg(feature = "analytics")]
#[cfg(all(not(debug_assertions), feature = "analytics"))]
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH";
@ -156,16 +154,12 @@ pub struct Opt {
#[serde(default = "default_env")]
pub env: String,
/// Called whenever a task finishes so a third party can be notified.
#[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
pub task_webhook_url: Option<Url>,
/// Deactivates Meilisearch's built-in telemetry when provided.
///
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
/// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted
/// at any time.
#[cfg(feature = "analytics")]
#[cfg(all(not(debug_assertions), feature = "analytics"))]
#[serde(default)] // we can't send true
#[clap(long, env = MEILI_NO_ANALYTICS)]
pub no_analytics: bool,
@ -374,7 +368,6 @@ impl Opt {
http_addr,
master_key,
env,
task_webhook_url,
max_index_size: _,
max_task_db_size: _,
http_payload_size_limit,
@ -397,7 +390,7 @@ impl Opt {
ignore_missing_dump: _,
ignore_dump_if_db_exists: _,
config_file_path: _,
#[cfg(feature = "analytics")]
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics,
experimental_enable_metrics: enable_metrics_route,
experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
@ -408,11 +401,7 @@ impl Opt {
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
}
export_to_env_if_not_present(MEILI_ENV, env);
if let Some(task_webhook_url) = task_webhook_url {
export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
}
#[cfg(feature = "analytics")]
#[cfg(all(not(debug_assertions), feature = "analytics"))]
{
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
}

View File

@ -29,12 +29,12 @@ async fn get_features(
>,
req: HttpRequest,
analytics: Data<dyn Analytics>,
) -> HttpResponse {
let features = index_scheduler.features();
) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features()?;
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
debug!("returns: {:?}", features.runtime_features());
HttpResponse::Ok().json(features.runtime_features())
Ok(HttpResponse::Ok().json(features.runtime_features()))
}
#[derive(Debug, Deserr)]
@ -44,10 +44,6 @@ pub struct RuntimeTogglableFeatures {
pub score_details: Option<bool>,
#[deserr(default)]
pub vector_store: Option<bool>,
#[deserr(default)]
pub metrics: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>,
}
async fn patch_features(
@ -59,36 +55,26 @@ async fn patch_features(
req: HttpRequest,
analytics: Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features();
let features = index_scheduler.features()?;
let old_features = features.runtime_features();
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
export_puffin_reports: new_features
.0
.export_puffin_reports
.unwrap_or(old_features.export_puffin_reports),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
// the it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures {
score_details,
vector_store,
metrics,
export_puffin_reports,
} = new_features;
let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } =
new_features;
analytics.publish(
"Experimental features Updated".to_string(),
json!({
"score_details": score_details,
"vector_store": vector_store,
"metrics": metrics,
"export_puffin_reports": export_puffin_reports,
}),
Some(&req),
);

View File

@ -612,8 +612,8 @@ fn retrieve_document<S: AsRef<str>>(
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let internal_id = index
.external_documents_ids()
.get(&txn, doc_id)?
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = index

View File

@ -68,7 +68,7 @@ pub async fn search(
}
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let features = index_scheduler.features()?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(&index, search_query, facet_query, facet_name, features)
})

View File

@ -157,7 +157,7 @@ pub async fn search_with_url_query(
let mut aggregate = SearchAggregator::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let features = index_scheduler.features()?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
if let Ok(ref search_result) = search_result {
@ -192,7 +192,7 @@ pub async fn search_with_post(
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let features = index_scheduler.features()?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
if let Ok(ref search_result) = search_result {

View File

@ -19,7 +19,7 @@ pub async fn get_metrics(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
auth_controller: Data<AuthController>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_metrics()?;
index_scheduler.features()?.check_metrics()?;
let auth_filters = index_scheduler.filters();
if !auth_filters.all_indexes_authorized() {
let mut error = ResponseError::from(AuthenticationError::InvalidToken);

View File

@ -24,7 +24,6 @@ pub mod features;
pub mod indexes;
mod metrics;
mod multi_search;
mod snapshot;
mod swap_indexes;
pub mod tasks;
@ -33,7 +32,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/health").route(web::get().to(get_health)))
.service(web::scope("/keys").configure(api_key::configure))
.service(web::scope("/dumps").configure(dump::configure))
.service(web::scope("/snapshots").configure(snapshot::configure))
.service(web::resource("/stats").route(web::get().to(get_stats)))
.service(web::resource("/version").route(web::get().to(get_version)))
.service(web::scope("/indexes").configure(indexes::configure))

View File

@ -41,7 +41,7 @@ pub async fn multi_search_with_post(
let queries = params.into_inner().queries;
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
let features = index_scheduler.features();
let features = index_scheduler.features()?;
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code

View File

@ -1,32 +0,0 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::SummarizedTaskView;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot))));
}
pub async fn create_snapshot(
index_scheduler: GuardedData<ActionPolicy<{ actions::SNAPSHOTS_CREATE }>, Data<IndexScheduler>>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req));
let task = KindWithContent::SnapshotCreation;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}

View File

@ -8,9 +8,11 @@ use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
use meilisearch_types::tasks::{
serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
};
use serde::Serialize;
use serde_json::json;
use time::format_description::well_known::Rfc3339;
@ -35,6 +37,140 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskView {
pub uid: TaskId,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
#[serde(rename = "type")]
pub kind: Kind,
pub canceled_by: Option<TaskId>,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<DetailsView>,
pub error: Option<ResponseError>,
#[serde(serialize_with = "serialize_duration", default)]
pub duration: Option<Duration>,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option", default)]
pub started_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339::option", default)]
pub finished_at: Option<OffsetDateTime>,
}
impl TaskView {
pub fn from_task(task: &Task) -> TaskView {
TaskView {
uid: task.uid,
index_uid: task.index_uid().map(ToOwned::to_owned),
status: task.status,
kind: task.kind.as_kind(),
canceled_by: task.canceled_by,
details: task.details.clone().map(DetailsView::from),
error: task.error.clone(),
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
}
}
}
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub received_documents: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indexed_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub provided_ids: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub matched_tasks: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub canceled_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub original_filter: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub settings: Option<Box<Settings<Unchecked>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub swaps: Option<Vec<IndexSwap>>,
}
impl From<Details> for DetailsView {
fn from(details: Details) -> Self {
match details {
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
DetailsView {
received_documents: Some(received_documents),
indexed_documents: Some(indexed_documents),
..DetailsView::default()
}
}
Details::SettingsUpdate { settings } => {
DetailsView { settings: Some(settings), ..DetailsView::default() }
}
Details::IndexInfo { primary_key } => {
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
}
Details::DocumentDeletion {
provided_ids: received_document_ids,
deleted_documents,
} => DetailsView {
provided_ids: Some(received_document_ids),
deleted_documents: Some(deleted_documents),
original_filter: Some(None),
..DetailsView::default()
},
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
DetailsView {
provided_ids: Some(0),
original_filter: Some(Some(original_filter)),
deleted_documents: Some(deleted_documents),
..DetailsView::default()
}
}
Details::ClearAll { deleted_documents } => {
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
}
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
canceled_tasks: Some(canceled_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
deleted_tasks: Some(deleted_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::Dump { dump_uid } => {
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
}
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
}
}
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TasksFilterQuery {

View File

@ -1,7 +1,8 @@
use std::{thread, time};
use crate::common::{Server, Value};
use crate::json;
use serde_json::{json, Value};
use crate::common::Server;
#[actix_rt::test]
async fn add_valid_api_key() {
@ -161,7 +162,7 @@ async fn add_valid_api_key_null_description() {
server.use_api_key("MASTER_KEY");
let content = json!({
"description": json!(null),
"description": Value::Null,
"indexes": ["products"],
"actions": ["documents.add"],
"expiresAt": "2050-11-13T00:00:00"
@ -364,7 +365,7 @@ async fn error_add_api_key_invalid_index_uids() {
server.use_api_key("MASTER_KEY");
let content = json!({
"description": json!(null),
"description": Value::Null,
"indexes": ["invalid index # / \\name with spaces"],
"actions": [
"documents.add"
@ -421,7 +422,7 @@ async fn error_add_api_key_invalid_parameters_actions() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
{
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`",
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
@ -506,7 +507,7 @@ async fn error_add_api_key_invalid_parameters_uid() {
async fn error_add_api_key_parameters_uid_already_exist() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
let content: Value = json!({
let content = json!({
"uid": "4bc0887a-0e41-4f3b-935d-0c451dcee9c8",
"indexes": ["products"],
"actions": ["search"],
@ -1145,7 +1146,7 @@ async fn patch_api_key_description() {
meili_snap::snapshot!(code, @"200 OK");
// Remove the description
let content = json!({ "description": null });
let content = json!({ "description": serde_json::Value::Null });
let (response, code) = server.patch_api_key(&uid, content).await;
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]", ".uid" => "[ignored]", ".key" => "[ignored]" }), @r###"

View File

@ -2,13 +2,11 @@ use std::collections::{HashMap, HashSet};
use ::time::format_description::well_known::Rfc3339;
use maplit::{hashmap, hashset};
use meilisearch::Opt;
use once_cell::sync::Lazy;
use tempfile::TempDir;
use serde_json::{json, Value};
use time::{Duration, OffsetDateTime};
use crate::common::{default_settings, Server, Value};
use crate::json;
use crate::common::Server;
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
Lazy::new(|| {
@ -56,7 +54,6 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
("GET", "/indexes/products/stats") => hashset!{"stats.get", "stats.*", "*"},
("GET", "/stats") => hashset!{"stats.get", "stats.*", "*"},
("POST", "/dumps") => hashset!{"dumps.create", "dumps.*", "*"},
("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"},
("GET", "/version") => hashset!{"version", "*"},
("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"},
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
@ -197,9 +194,7 @@ async fn access_authorized_master_key() {
#[actix_rt::test]
async fn access_authorized_restricted_index() {
let dir = TempDir::new().unwrap();
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
let mut server = Server::new_auth_with_options(enable_metrics, dir).await;
let mut server = Server::new_auth().await;
for ((method, route), actions) in AUTHORIZATIONS.iter() {
for action in actions {
// create a new API key letting only the needed action.

View File

@ -1,8 +1,8 @@
use meili_snap::*;
use serde_json::json;
use uuid::Uuid;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn create_api_key_bad_description() {
@ -90,7 +90,7 @@ async fn create_api_key_bad_actions() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`",
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"

View File

@ -7,9 +7,9 @@ mod tenant_token;
mod tenant_token_multi_search;
use actix_web::http::StatusCode;
use serde_json::{json, Value};
use crate::common::{Server, Value};
use crate::json;
use crate::common::Server;
impl Server {
pub fn use_api_key(&mut self, api_key: impl AsRef<str>) {

View File

@ -3,11 +3,11 @@ use std::collections::HashMap;
use ::time::format_description::well_known::Rfc3339;
use maplit::hashmap;
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use time::{Duration, OffsetDateTime};
use super::authorization::{ALL_ACTIONS, AUTHORIZATIONS};
use crate::common::{Server, Value};
use crate::json;
use crate::common::Server;
fn generate_tenant_token(
parent_uid: impl AsRef<str>,
@ -233,31 +233,31 @@ async fn search_authorized_simple_token() {
},
hashmap! {
"searchRules" => json!({"*": {}}),
"exp" => json!(null)
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"*": null}),
"exp" => json!(null)
"searchRules" => json!({"*": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["*"]),
"exp" => json!(null)
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"sales": {}}),
"exp" => json!(null)
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"sales": null}),
"exp" => json!(null)
"searchRules" => json!({"sales": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["sales"]),
"exp" => json!(null)
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["sa*"]),
"exp" => json!(null)
"exp" => Value::Null
},
];
@ -386,7 +386,7 @@ async fn error_search_token_forbidden_parent_key() {
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"*": null}),
"searchRules" => json!({"*": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
@ -398,7 +398,7 @@ async fn error_search_token_forbidden_parent_key() {
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": null}),
"searchRules" => json!({"sales": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
@ -428,15 +428,15 @@ async fn error_search_forbidden_token() {
},
hashmap! {
"searchRules" => json!({"products": {}}),
"exp" => json!(null)
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"products": null}),
"exp" => json!(null)
"searchRules" => json!({"products": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["products"]),
"exp" => json!(null)
"exp" => Value::Null
},
// expired token
hashmap! {
@ -444,7 +444,7 @@ async fn error_search_forbidden_token() {
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"*": null}),
"searchRules" => json!({"*": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
@ -456,7 +456,7 @@ async fn error_search_forbidden_token() {
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": null}),
"searchRules" => json!({"sales": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {

View File

@ -3,11 +3,11 @@ use std::collections::HashMap;
use ::time::format_description::well_known::Rfc3339;
use maplit::hashmap;
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use time::{Duration, OffsetDateTime};
use super::authorization::ALL_ACTIONS;
use crate::common::{Server, Value};
use crate::json;
use crate::common::Server;
fn generate_tenant_token(
parent_uid: impl AsRef<str>,
@ -512,31 +512,31 @@ async fn single_search_authorized_simple_token() {
},
hashmap! {
"searchRules" => json!({"*": {}}),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"*": null}),
"exp" => json!(null),
"searchRules" => json!({"*": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["*"]),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"sales": {}}),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"sales": null}),
"exp" => json!(null),
"searchRules" => json!({"sales": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["sales"]),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["sa*"]),
"exp" => json!(null),
"exp" => Value::Null
},
];
@ -564,31 +564,31 @@ async fn multi_search_authorized_simple_token() {
},
hashmap! {
"searchRules" => json!({"*": {}}),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"*": null}),
"exp" => json!(null),
"searchRules" => json!({"*": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["*"]),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"sales": {}, "products": {}}),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"sales": null, "products": null}),
"exp" => json!(null),
"searchRules" => json!({"sales": Value::Null, "products": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["sales", "products"]),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["sa*", "pro*"]),
"exp" => json!(null),
"exp" => Value::Null
},
];
@ -823,7 +823,7 @@ async fn error_single_search_token_forbidden_parent_key() {
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"*": null}),
"searchRules" => json!({"*": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
@ -835,7 +835,7 @@ async fn error_single_search_token_forbidden_parent_key() {
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": null}),
"searchRules" => json!({"sales": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
@ -864,7 +864,7 @@ async fn error_multi_search_token_forbidden_parent_key() {
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"*": null}),
"searchRules" => json!({"*": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
@ -876,7 +876,7 @@ async fn error_multi_search_token_forbidden_parent_key() {
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": null, "products": null}),
"searchRules" => json!({"sales": Value::Null, "products": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
@ -919,15 +919,15 @@ async fn error_single_search_forbidden_token() {
},
hashmap! {
"searchRules" => json!({"products": {}}),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"products": null}),
"exp" => json!(null),
"searchRules" => json!({"products": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["products"]),
"exp" => json!(null),
"exp" => Value::Null
},
// expired token
hashmap! {
@ -935,7 +935,7 @@ async fn error_single_search_forbidden_token() {
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"*": null}),
"searchRules" => json!({"*": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
@ -947,7 +947,7 @@ async fn error_single_search_forbidden_token() {
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": null}),
"searchRules" => json!({"sales": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
@ -978,15 +978,15 @@ async fn error_multi_search_forbidden_token() {
},
hashmap! {
"searchRules" => json!({"products": {}}),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"products": null}),
"exp" => json!(null),
"searchRules" => json!({"products": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["products"]),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"sales": {}}),
@ -998,15 +998,15 @@ async fn error_multi_search_forbidden_token() {
},
hashmap! {
"searchRules" => json!({"sales": {}}),
"exp" => json!(null),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"sales": null}),
"exp" => json!(null),
"searchRules" => json!({"sales": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["sales"]),
"exp" => json!(null),
"exp" => Value::Null
},
// expired token
hashmap! {
@ -1014,7 +1014,7 @@ async fn error_multi_search_forbidden_token() {
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"*": null}),
"searchRules" => json!({"*": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
@ -1026,7 +1026,7 @@ async fn error_multi_search_forbidden_token() {
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": null, "products": {}}),
"searchRules" => json!({"sales": Value::Null, "products": {}}),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {

View File

@ -3,13 +3,12 @@ use std::panic::{catch_unwind, resume_unwind, UnwindSafe};
use std::time::Duration;
use actix_web::http::StatusCode;
use serde_json::{json, Value};
use tokio::time::sleep;
use urlencoding::encode as urlencode;
use super::encoder::Encoder;
use super::service::Service;
use super::Value;
use crate::json;
pub struct Index<'a> {
pub uid: String,
@ -243,9 +242,7 @@ impl Index<'_> {
pub async fn delete_batch(&self, ids: Vec<u64>) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents/delete-batch", urlencode(self.uid.as_ref()));
self.service
.post_encoded(url, serde_json::to_value(&ids).unwrap().into(), self.encoder)
.await
self.service.post_encoded(url, serde_json::to_value(&ids).unwrap(), self.encoder).await
}
pub async fn delete_batch_raw(&self, body: Value) -> (Value, StatusCode) {

View File

@ -3,85 +3,9 @@ pub mod index;
pub mod server;
pub mod service;
use std::fmt::{self, Display};
#[allow(unused)]
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
use meili_snap::json_string;
use serde::{Deserialize, Serialize};
#[allow(unused)]
pub use server::{default_settings, Server};
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct Value(pub serde_json::Value);
impl Value {
pub fn uid(&self) -> u64 {
if let Some(uid) = self["uid"].as_u64() {
uid
} else if let Some(uid) = self["taskUid"].as_u64() {
uid
} else {
panic!("Didn't find any task id in: {self}");
}
}
}
impl From<serde_json::Value> for Value {
fn from(value: serde_json::Value) -> Self {
Value(value)
}
}
impl std::ops::Deref for Value {
type Target = serde_json::Value;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl PartialEq<serde_json::Value> for Value {
fn eq(&self, other: &serde_json::Value) -> bool {
&self.0 == other
}
}
impl PartialEq<Value> for serde_json::Value {
fn eq(&self, other: &Value) -> bool {
self == &other.0
}
}
impl PartialEq<&str> for Value {
fn eq(&self, other: &&str) -> bool {
self.0.eq(other)
}
}
impl Display for Value {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}",
json_string!(self, { ".enqueuedAt" => "[date]", ".processedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
)
}
}
impl From<Vec<Value>> for Value {
fn from(value: Vec<Value>) -> Self {
Self(value.into_iter().map(|value| value.0).collect::<serde_json::Value>())
}
}
#[macro_export]
macro_rules! json {
($($json:tt)+) => {
$crate::common::Value(serde_json::json!($($json)+))
};
}
/// Performs a search test on both post and get routes
#[macro_export]
macro_rules! test_post_get_search {

View File

@ -11,14 +11,13 @@ use clap::Parser;
use meilisearch::option::{IndexerOpts, MaxMemory, Opt};
use meilisearch::{analytics, create_app, setup_meilisearch};
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use tempfile::TempDir;
use tokio::time::sleep;
use super::index::Index;
use super::service::Service;
use crate::common::encoder::Encoder;
use crate::common::Value;
use crate::json;
pub struct Server {
pub service: Service,
@ -157,10 +156,6 @@ impl Server {
self.service.post("/dumps", json!(null)).await
}
pub async fn create_snapshot(&self) -> (Value, StatusCode) {
self.service.post("/snapshots", json!(null)).await
}
pub async fn index_swap(&self, value: Value) -> (Value, StatusCode) {
self.service.post("/swap-indexes", value).await
}
@ -202,10 +197,6 @@ impl Server {
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
self.service.patch("/experimental-features", value).await
}
pub async fn get_metrics(&self) -> (Value, StatusCode) {
self.service.get("/metrics").await
}
}
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
@ -213,7 +204,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
db_path: dir.as_ref().join("db"),
dump_dir: dir.as_ref().join("dumps"),
env: "development".to_owned(),
#[cfg(feature = "analytics")]
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics: true,
max_index_size: Byte::from_unit(100.0, ByteUnit::MiB).unwrap(),
max_task_db_size: Byte::from_unit(1.0, ByteUnit::GiB).unwrap(),
@ -225,7 +216,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
skip_index_budget: true,
..Parser::parse_from(None as Option<&str>)
},
experimental_enable_metrics: false,
experimental_enable_metrics: true,
..Parser::parse_from(None as Option<&str>)
}
}

View File

@ -7,9 +7,9 @@ use actix_web::test::TestRequest;
use index_scheduler::IndexScheduler;
use meilisearch::{analytics, create_app, Opt};
use meilisearch_auth::AuthController;
use serde_json::Value;
use crate::common::encoder::Encoder;
use crate::common::Value;
pub struct Service {
pub index_scheduler: Arc<IndexScheduler>,

View File

@ -3,8 +3,9 @@
mod common;
use actix_web::test;
use serde_json::{json, Value};
use crate::common::{Server, Value};
use crate::common::Server;
enum HttpVerb {
Put,

View File

@ -1,11 +1,11 @@
use actix_web::test;
use meili_snap::{json_string, snapshot};
use serde_json::{json, Value};
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use crate::common::encoder::Encoder;
use crate::common::{GetAllDocumentsOptions, Server, Value};
use crate::json;
use crate::common::{GetAllDocumentsOptions, Server};
/// This is the basic usage of our API and every other tests uses the content-type application/json
#[actix_rt::test]

View File

@ -1,7 +1,7 @@
use meili_snap::{json_string, snapshot};
use serde_json::json;
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
#[actix_rt::test]
async fn delete_one_document_unexisting_index() {
@ -397,7 +397,7 @@ async fn delete_document_by_complex_filter() {
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 2,
"deletedDocuments": 4,
"originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
},
"error": null,

View File

@ -1,8 +1,8 @@
use meili_snap::*;
use serde_json::json;
use urlencoding::encode;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn get_all_documents_bad_offset() {

View File

@ -1,11 +1,11 @@
use actix_web::test;
use http::header::ACCEPT_ENCODING;
use meili_snap::*;
use serde_json::{json, Value};
use urlencoding::encode as urlencode;
use crate::common::encoder::Encoder;
use crate::common::{GetAllDocumentsOptions, GetDocumentOptions, Server, Value};
use crate::json;
use crate::common::{GetAllDocumentsOptions, GetDocumentOptions, Server};
// TODO: partial test since we are testing error, amd error is not yet fully implemented in
// transplant
@ -40,7 +40,7 @@ async fn get_document() {
let server = Server::new().await;
let index = server.index("test");
index.create(None).await;
let documents = json!([
let documents = serde_json::json!([
{
"id": 0,
"nested": { "content": "foobar" },
@ -53,7 +53,7 @@ async fn get_document() {
assert_eq!(code, 200);
assert_eq!(
response,
json!({
serde_json::json!({
"id": 0,
"nested": { "content": "foobar" },
})
@ -64,7 +64,7 @@ async fn get_document() {
assert_eq!(code, 200);
assert_eq!(
response,
json!({
serde_json::json!({
"id": 0,
})
);
@ -75,7 +75,7 @@ async fn get_document() {
assert_eq!(code, 200);
assert_eq!(
response,
json!({
serde_json::json!({
"nested": { "content": "foobar" },
})
);
@ -122,7 +122,7 @@ async fn get_all_documents_no_options() {
assert_eq!(code, 200);
let arr = response["results"].as_array().unwrap();
assert_eq!(arr.len(), 20);
let first = json!({
let first = serde_json::json!({
"id":0,
"isActive":false,
"balance":"$2,668.55",

View File

@ -1,8 +1,7 @@
use meili_snap::snapshot;
use serde_json::json;
use crate::common::encoder::Encoder;
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
#[actix_rt::test]
async fn error_document_update_create_index_bad_uid() {
@ -85,13 +84,7 @@ async fn update_document() {
let (response, code) = index.get_document(1, None).await;
assert_eq!(code, 200);
snapshot!(response, @r###"
{
"doc_id": 1,
"content": "foo",
"other": "bar"
}
"###);
assert_eq!(response.to_string(), r##"{"doc_id":1,"content":"foo","other":"bar"}"##);
}
#[actix_rt::test]
@ -129,13 +122,7 @@ async fn update_document_gzip_encoded() {
let (response, code) = index.get_document(1, None).await;
assert_eq!(code, 200);
snapshot!(response, @r###"
{
"doc_id": 1,
"content": "foo",
"other": "bar"
}
"###);
assert_eq!(response.to_string(), r##"{"doc_id":1,"content":"foo","other":"bar"}"##);
}
#[actix_rt::test]

View File

@ -2,10 +2,10 @@ mod data;
use meili_snap::{json_string, snapshot};
use meilisearch::Opt;
use serde_json::json;
use self::data::GetDump;
use crate::common::{default_settings, GetAllDocumentsOptions, Server};
use crate::json;
// all the following test are ignored on windows. See #2364
#[actix_rt::test]

View File

@ -1,8 +1,6 @@
use meilisearch::Opt;
use tempfile::TempDir;
use serde_json::json;
use crate::common::{default_settings, Server};
use crate::json;
use crate::common::Server;
/// Feature name to test against.
/// This will have to be changed by a different one when that feature is stabilized.
@ -19,9 +17,7 @@ async fn experimental_features() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false
"vectorStore": false
}
"###);
@ -31,9 +27,7 @@ async fn experimental_features() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"vectorStore": true
}
"###);
@ -43,9 +37,7 @@ async fn experimental_features() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"vectorStore": true
}
"###);
@ -56,9 +48,7 @@ async fn experimental_features() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"vectorStore": true
}
"###);
@ -69,73 +59,11 @@ async fn experimental_features() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"vectorStore": true
}
"###);
}
#[actix_rt::test]
async fn experimental_feature_metrics() {
// instance flag for metrics enables metrics at startup
let dir = TempDir::new().unwrap();
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
let server = Server::new_with_options(enable_metrics).await.unwrap();
let (response, code) = server.get_features().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": true,
"exportPuffinReports": false
}
"###);
let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
// metrics are not returned in json format
// so the test server will return null
meili_snap::snapshot!(response, @"null");
// disabling metrics results in invalid request
let (response, code) = server.set_features(json!({"metrics": false})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response["metrics"], @"false");
let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// enabling metrics via HTTP results in valid request
let (response, code) = server.set_features(json!({"metrics": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response["metrics"], @"true");
let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null");
// startup without flag respects persisted metrics value
let disable_metrics =
Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
let (response, code) = server_no_flag.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null");
}
#[actix_rt::test]
async fn errors() {
let server = Server::new().await;
@ -146,7 +74,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -2,10 +2,10 @@ use actix_web::http::header::ContentType;
use actix_web::test;
use http::header::ACCEPT_ENCODING;
use meili_snap::{json_string, snapshot};
use serde_json::{json, Value};
use crate::common::encoder::Encoder;
use crate::common::{Server, Value};
use crate::json;
use crate::common::Server;
#[actix_rt::test]
async fn create_index_no_primary_key() {
@ -21,7 +21,7 @@ async fn create_index_no_primary_key() {
assert_eq!(response["status"], "succeeded");
assert_eq!(response["type"], "indexCreation");
assert_eq!(response["details"]["primaryKey"], json!(null));
assert_eq!(response["details"]["primaryKey"], Value::Null);
}
#[actix_rt::test]
@ -38,7 +38,7 @@ async fn create_index_with_gzip_encoded_request() {
assert_eq!(response["status"], "succeeded");
assert_eq!(response["type"], "indexCreation");
assert_eq!(response["details"]["primaryKey"], json!(null));
assert_eq!(response["details"]["primaryKey"], Value::Null);
}
#[actix_rt::test]
@ -86,7 +86,7 @@ async fn create_index_with_zlib_encoded_request() {
assert_eq!(response["status"], "succeeded");
assert_eq!(response["type"], "indexCreation");
assert_eq!(response["details"]["primaryKey"], json!(null));
assert_eq!(response["details"]["primaryKey"], Value::Null);
}
#[actix_rt::test]
@ -103,7 +103,7 @@ async fn create_index_with_brotli_encoded_request() {
assert_eq!(response["status"], "succeeded");
assert_eq!(response["type"], "indexCreation");
assert_eq!(response["details"]["primaryKey"], json!(null));
assert_eq!(response["details"]["primaryKey"], Value::Null);
}
#[actix_rt::test]
@ -136,7 +136,7 @@ async fn create_index_with_invalid_primary_key() {
let (response, code) = index.get().await;
assert_eq!(code, 200);
assert_eq!(response["primaryKey"], json!(null));
assert_eq!(response["primaryKey"], Value::Null);
}
#[actix_rt::test]

View File

@ -1,5 +1,6 @@
use serde_json::json;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn create_and_delete_index() {

View File

@ -1,7 +1,7 @@
use meili_snap::*;
use serde_json::json;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn get_indexes_bad_offset() {

View File

@ -1,5 +1,6 @@
use serde_json::json;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn stats() {

View File

@ -1,9 +1,9 @@
use serde_json::json;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use crate::common::encoder::Encoder;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn update_primary_key() {

View File

@ -1,8 +1,8 @@
use meili_snap::snapshot;
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use crate::common::{Server, Value};
use crate::json;
use crate::common::Server;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([

View File

@ -1,8 +1,8 @@
use meili_snap::*;
use serde_json::json;
use super::DOCUMENTS;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn search_unexisting_index() {

View File

@ -1,8 +1,8 @@
use meili_snap::snapshot;
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use crate::common::{Server, Value};
use crate::json;
use crate::common::Server;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([

View File

@ -1,8 +1,8 @@
use insta::{allow_duplicates, assert_json_snapshot};
use serde_json::json;
use super::*;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn formatted_contain_wildcard() {

View File

@ -1,8 +1,8 @@
use meili_snap::{json_string, snapshot};
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use crate::common::{Server, Value};
use crate::json;
use crate::common::Server;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([

View File

@ -11,9 +11,9 @@ mod pagination;
mod restrict_searchable;
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use crate::common::{Server, Value};
use crate::json;
use crate::common::Server;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
@ -817,7 +817,7 @@ async fn experimental_feature_score_details() {
},
"proximity": {
"order": 2,
"score": 0.75
"score": 0.875
},
"attribute": {
"order": 3,

View File

@ -1,8 +1,8 @@
use meili_snap::{json_string, snapshot};
use serde_json::json;
use super::{DOCUMENTS, NESTED_DOCUMENTS};
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn search_empty_list() {

View File

@ -1,5 +1,6 @@
use serde_json::json;
use crate::common::Server;
use crate::json;
use crate::search::DOCUMENTS;
#[actix_rt::test]

View File

@ -1,9 +1,9 @@
use meili_snap::{json_string, snapshot};
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use crate::common::index::Index;
use crate::common::{Server, Value};
use crate::json;
use crate::common::Server;
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
let index = server.index("test");

View File

@ -1,5 +1,6 @@
use serde_json::json;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn set_and_reset_distinct_attribute() {

View File

@ -1,7 +1,7 @@
use meili_snap::*;
use serde_json::json;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn settings_bad_displayed_attributes() {

View File

@ -1,16 +1,16 @@
use std::collections::HashMap;
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use crate::common::{Server, Value};
use crate::json;
use crate::common::Server;
static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|| {
let mut map = HashMap::new();
map.insert("displayed_attributes", json!(["*"]));
map.insert("searchable_attributes", json!(["*"]));
map.insert("filterable_attributes", json!([]));
map.insert("distinct_attribute", json!(null));
map.insert("distinct_attribute", json!(Value::Null));
map.insert(
"ranking_rules",
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]),
@ -229,7 +229,7 @@ macro_rules! test_setting_routes {
.chars()
.map(|c| if c == '_' { '-' } else { c })
.collect::<String>());
let (response, code) = server.service.$write_method(url, serde_json::Value::Null.into()).await;
let (response, code) = server.service.$write_method(url, serde_json::Value::Null).await;
assert_eq!(code, 202, "{}", response);
server.index("").wait_task(0).await;
let (response, code) = server.index("test").get().await;

View File

@ -1,7 +1,7 @@
use meili_snap::{json_string, snapshot};
use serde_json::json;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn set_and_reset() {

View File

@ -1,13 +1,11 @@
use std::time::Duration;
use actix_rt::time::sleep;
use meili_snap::{json_string, snapshot};
use meilisearch::option::ScheduleSnapshot;
use meilisearch::Opt;
use crate::common::server::default_settings;
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
macro_rules! verify_snapshot {
(
@ -46,7 +44,7 @@ async fn perform_snapshot() {
let index = server.index("test");
index
.update_settings(json! ({
.update_settings(serde_json::json! ({
"searchableAttributes": [],
}))
.await;
@ -92,95 +90,3 @@ async fn perform_snapshot() {
server.index("test1").settings(),
);
}
#[actix_rt::test]
async fn perform_on_demand_snapshot() {
let temp = tempfile::tempdir().unwrap();
let snapshot_dir = tempfile::tempdir().unwrap();
let options =
Opt { snapshot_dir: snapshot_dir.path().to_owned(), ..default_settings(temp.path()) };
let server = Server::new_with_options(options).await.unwrap();
let index = server.index("catto");
index
.update_settings(json! ({
"searchableAttributes": [],
}))
.await;
index.load_test_set().await;
server.index("doggo").create(Some("bone")).await;
index.wait_task(2).await;
server.index("doggo").create(Some("bone")).await;
index.wait_task(2).await;
let (task, code) = server.create_snapshot().await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(task, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 4,
"indexUid": null,
"status": "enqueued",
"type": "snapshotCreation",
"enqueuedAt": "[date]"
}
"###);
let task = index.wait_task(task.uid()).await;
snapshot!(json_string!(task, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 4,
"indexUid": null,
"status": "succeeded",
"type": "snapshotCreation",
"canceledBy": null,
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let temp = tempfile::tempdir().unwrap();
let snapshots: Vec<String> = std::fs::read_dir(&snapshot_dir)
.unwrap()
.map(|entry| entry.unwrap().path().file_name().unwrap().to_str().unwrap().to_string())
.collect();
meili_snap::snapshot!(format!("{snapshots:?}"), @r###"["db.snapshot"]"###);
let snapshot_path = snapshot_dir.path().to_owned().join("db.snapshot");
#[cfg_attr(windows, allow(unused))]
let snapshot_meta = std::fs::metadata(&snapshot_path).unwrap();
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let mode = snapshot_meta.permissions().mode();
// rwxrwxrwx
meili_snap::snapshot!(format!("{:b}", mode), @"1000000100100100");
}
let options = Opt { import_snapshot: Some(snapshot_path), ..default_settings(temp.path()) };
let snapshot_server = Server::new_with_options(options).await.unwrap();
verify_snapshot!(server, snapshot_server, |server| =>
server.list_indexes(None, None),
// for some reason the db sizes differ. this may be due to the compaction options we have
// set when performing the snapshot
//server.stats(),
// The original instance contains the snapshotCreation task, while the snapshotted-instance does not. For this reason we need to compare the task queue **after** the task 4
server.tasks_filter("?from=2"),
server.index("catto").get_all_documents(GetAllDocumentsOptions::default()),
server.index("catto").settings(),
server.index("doggo").get_all_documents(GetAllDocumentsOptions::default()),
server.index("doggo").settings(),
);
}

View File

@ -1,8 +1,8 @@
use serde_json::json;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn get_settings_unexisting_index() {

View File

@ -1,7 +1,7 @@
use meili_snap::*;
use serde_json::json;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn swap_indexes_bad_format() {

View File

@ -1,9 +1,9 @@
mod errors;
use meili_snap::{json_string, snapshot};
use serde_json::json;
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
#[actix_rt::test]
async fn swap_indexes() {

View File

@ -1,12 +1,11 @@
mod errors;
mod webhook;
use meili_snap::insta::assert_json_snapshot;
use serde_json::json;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn error_get_unexisting_task_status() {
@ -34,7 +33,7 @@ async fn get_task_status() {
index.create(None).await;
index
.add_documents(
json!([{
serde_json::json!([{
"id": 1,
"content": "foobar",
}]),

View File

@ -1,123 +0,0 @@
//! To test the webhook, we need to spawn a new server with a URL listening for
//! post requests. The webhook handle starts a server and forwards all the
//! received requests into a channel for you to handle.
use std::sync::Arc;
use actix_http::body::MessageBody;
use actix_web::dev::{ServiceFactory, ServiceResponse};
use actix_web::web::{Bytes, Data};
use actix_web::{post, App, HttpResponse, HttpServer};
use meili_snap::{json_string, snapshot};
use meilisearch::Opt;
use tokio::sync::mpsc;
use url::Url;
use crate::common::{default_settings, Server};
use crate::json;
#[post("/")]
async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
let body = body.to_vec();
sender.send(body).unwrap();
HttpResponse::Ok().into()
}
fn create_app(
sender: Arc<mpsc::UnboundedSender<Vec<u8>>>,
) -> actix_web::App<
impl ServiceFactory<
actix_web::dev::ServiceRequest,
Config = (),
Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error,
InitError = (),
>,
> {
App::new().service(forward_body).app_data(Data::from(sender))
}
struct WebhookHandle {
pub server_handle: tokio::task::JoinHandle<Result<(), std::io::Error>>,
pub url: String,
pub receiver: mpsc::UnboundedReceiver<Vec<u8>>,
}
async fn create_webhook_server() -> WebhookHandle {
let mut log_builder = env_logger::Builder::new();
log_builder.parse_filters("info");
log_builder.init();
let (sender, receiver) = mpsc::unbounded_channel();
let sender = Arc::new(sender);
// By listening on the port 0, the system will give us any available port.
let server =
HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
let (ip, scheme) = server.addrs_with_scheme()[0];
let url = format!("{scheme}://{ip}/");
let server_handle = tokio::spawn(server.run());
WebhookHandle { server_handle, url, receiver }
}
#[actix_web::test]
async fn test_basic_webhook() {
let WebhookHandle { server_handle, url, mut receiver } = create_webhook_server().await;
let db_path = tempfile::tempdir().unwrap();
let server = Server::new_with_options(Opt {
task_webhook_url: Some(Url::parse(&url).unwrap()),
..default_settings(db_path.path())
})
.await
.unwrap();
let index = server.index("tamo");
// May be flaky: we're relying on the fact that while the first document addition is processed, the other
// operations will be received and will be batched together. If it doesn't happen it's not a problem
// the rest of the test won't assume anything about the number of tasks per batch.
for i in 0..5 {
let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
}
let mut nb_tasks = 0;
while let Some(payload) = receiver.recv().await {
let payload = String::from_utf8(payload).unwrap();
let jsonl = payload.split('\n');
for json in jsonl {
if json.is_empty() {
break; // we reached EOF
}
nb_tasks += 1;
let json: serde_json::Value = serde_json::from_str(json).unwrap();
snapshot!(
json_string!(json, { ".uid" => "[uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": "[uid]",
"indexUid": "tamo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
if nb_tasks == 5 {
break;
}
}
assert!(nb_tasks == 5, "We should have received the 5 tasks but only received {nb_tasks}");
server_handle.abort();
}

View File

@ -1,19 +0,0 @@
[package]
name = "meilitool"
description = "A CLI to edit a Meilisearch database from the command line"
version.workspace = true
authors.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.75"
clap = { version = "4.2.1", features = ["derive"] }
dump = { path = "../dump" }
file-store = { path = "../file-store" }
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
time = { version = "0.3.30", features = ["formatting"] }
uuid = { version = "1.5.0", features = ["v4"], default-features = false }

View File

@ -1,312 +0,0 @@
use std::fs::{read_dir, read_to_string, remove_file, File};
use std::io::BufWriter;
use std::path::PathBuf;
use anyhow::Context;
use clap::{Parser, Subcommand};
use dump::{DumpWriter, IndexMetadata};
use file_store::FileStore;
use meilisearch_auth::AuthController;
use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::{obkv_to_json, BEU32};
use meilisearch_types::tasks::{Status, Task};
use meilisearch_types::versioning::check_version_file;
use meilisearch_types::Index;
use time::macros::format_description;
use time::OffsetDateTime;
use uuid_codec::UuidCodec;
mod uuid_codec;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Cli {
/// The database path where the Meilisearch is running.
#[arg(long, default_value = "data.ms/")]
db_path: PathBuf,
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand)]
enum Command {
/// Clears the task queue and make it empty.
///
/// This command can be safely executed even if Meilisearch is running and processing tasks.
/// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible,
/// even the ones that were processing. However, it's highly possible that you see the processing
/// tasks in the queue again with an associated internal error message.
ClearTaskQueue,
/// Exports a dump from the Meilisearch database.
///
/// Make sure to run this command when Meilisearch is not running or running but not processing tasks.
/// If tasks are being processed while a dump is being exported there are chances for the dump to be
/// malformed with missing tasks.
///
/// TODO Verify this claim or make sure it cannot happen and we can export dumps
/// without caring about killing Meilisearch first!
ExportADump {
/// The directory in which the dump will be created.
#[arg(long, default_value = "dumps/")]
dump_dir: PathBuf,
/// Skip dumping the enqueued or processing tasks.
///
/// Can be useful when there are a lot of them and it is not particularly useful
/// to keep them. Note that only the enqueued tasks takes up space so skipping
/// the processed ones is not particularly interesting.
#[arg(long)]
skip_enqueued_tasks: bool,
},
}
fn main() -> anyhow::Result<()> {
let Cli { db_path, command } = Cli::parse();
check_version_file(&db_path).context("While checking the version file")?;
match command {
Command::ClearTaskQueue => clear_task_queue(db_path),
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
}
}
}
/// Clears the task queue located at `db_path`.
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
let path = db_path.join("tasks");
let env = EnvOpenOptions::new()
.max_dbs(100)
.open(&path)
.with_context(|| format!("While trying to open {:?}", path.display()))?;
eprintln!("Deleting tasks from the database...");
let mut wtxn = env.write_txn()?;
let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?;
let total = all_tasks.len(&wtxn)?;
let status = try_opening_poly_database(&env, &wtxn, "status")?;
let kind = try_opening_poly_database(&env, &wtxn, "kind")?;
let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?;
let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?;
let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?;
let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?;
let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?;
try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?;
try_clearing_poly_database(&mut wtxn, status, "status")?;
try_clearing_poly_database(&mut wtxn, kind, "kind")?;
try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?;
try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?;
try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?;
try_clearing_poly_database(&mut wtxn, started_at, "started-at")?;
try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?;
wtxn.commit().context("While committing the transaction")?;
eprintln!("Successfully deleted {total} tasks from the tasks database!");
eprintln!("Deleting the content files from disk...");
let mut count = 0usize;
let update_files = db_path.join("update_files");
let entries = read_dir(&update_files).with_context(|| {
format!("While trying to read the content of {:?}", update_files.display())
})?;
for result in entries {
match result {
Ok(ent) => match remove_file(ent.path()) {
Ok(_) => count += 1,
Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e),
},
Err(e) => {
eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e)
}
}
}
eprintln!("Sucessfully deleted {count} content files from disk!");
Ok(())
}
fn try_opening_database<KC: 'static, DC: 'static>(
env: &Env,
rtxn: &RoTxn,
db_name: &str,
) -> anyhow::Result<Database<KC, DC>> {
env.open_database(rtxn, Some(db_name))
.with_context(|| format!("While opening the {db_name:?} database"))?
.with_context(|| format!("Missing the {db_name:?} database"))
}
fn try_opening_poly_database(
env: &Env,
rtxn: &RoTxn,
db_name: &str,
) -> anyhow::Result<PolyDatabase> {
env.open_poly_database(rtxn, Some(db_name))
.with_context(|| format!("While opening the {db_name:?} poly database"))?
.with_context(|| format!("Missing the {db_name:?} poly database"))
}
fn try_clearing_poly_database(
wtxn: &mut RwTxn,
database: PolyDatabase,
db_name: &str,
) -> anyhow::Result<()> {
database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
}
/// Exports a dump into the dump directory.
fn export_a_dump(
db_path: PathBuf,
dump_dir: PathBuf,
skip_enqueued_tasks: bool,
) -> Result<(), anyhow::Error> {
let started_at = OffsetDateTime::now_utc();
// 1. Extracts the instance UID from disk
let instance_uid_path = db_path.join("instance-uid");
let instance_uid = match read_to_string(&instance_uid_path) {
Ok(content) => match content.trim().parse() {
Ok(uuid) => Some(uuid),
Err(e) => {
eprintln!("Impossible to parse instance-uid: {e}");
None
}
},
Err(e) => {
eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e);
None
}
};
let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?;
let file_store =
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
let index_scheduler_path = db_path.join("tasks");
let env = EnvOpenOptions::new()
.max_dbs(100)
.open(&index_scheduler_path)
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
eprintln!("Dumping the keys...");
// 2. dump the keys
let auth_store = AuthController::new(&db_path, &None)
.with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
let mut dump_keys = dump.create_keys()?;
let mut count = 0;
for key in auth_store.list_keys()? {
dump_keys.push_key(&key)?;
count += 1;
}
dump_keys.flush()?;
eprintln!("Successfully dumped {count} keys!");
let rtxn = env.read_txn()?;
let all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>> =
try_opening_database(&env, &rtxn, "all-tasks")?;
let index_mapping: Database<Str, UuidCodec> =
try_opening_database(&env, &rtxn, "index-mapping")?;
if skip_enqueued_tasks {
eprintln!("Skip dumping the enqueued tasks...");
} else {
eprintln!("Dumping the enqueued tasks...");
// 3. dump the tasks
let mut dump_tasks = dump.create_tasks_queue()?;
let mut count = 0;
for ret in all_tasks.iter(&rtxn)? {
let (_, t) = ret?;
let status = t.status;
let content_file = t.content_uuid();
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
if let Some(content_file_uuid) = content_file {
if status == Status::Enqueued {
let content_file = file_store.get_update(content_file_uuid)?;
let reader =
DocumentsBatchReader::from_reader(content_file).with_context(|| {
format!("While reading content file {:?}", content_file_uuid)
})?;
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
while let Some(doc) = cursor.next_document().with_context(|| {
format!("While iterating on content file {:?}", content_file_uuid)
})? {
dump_content_file
.push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
}
dump_content_file.flush()?;
count += 1;
}
}
}
dump_tasks.flush()?;
eprintln!("Successfully dumped {count} enqueued tasks!");
}
eprintln!("Dumping the indexes...");
// 4. Dump the indexes
let mut count = 0;
for result in index_mapping.iter(&rtxn)? {
let (uid, uuid) = result?;
let index_path = db_path.join("indexes").join(uuid.to_string());
let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
format!("While trying to open the index at path {:?}", index_path.display())
})?;
let rtxn = index.read_txn()?;
let metadata = IndexMetadata {
uid: uid.to_owned(),
primary_key: index.primary_key(&rtxn)?.map(String::from),
created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at(&rtxn)?,
};
let mut index_dumper = dump.create_index(uid, &metadata)?;
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
// 4.1. Dump the documents
for ret in index.all_documents(&rtxn)? {
let (_id, doc) = ret?;
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?;
}
// 4.2. Dump the settings
let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
index_dumper.settings(&settings)?;
count += 1;
}
eprintln!("Successfully dumped {count} indexes!");
// We will not dump experimental feature settings
eprintln!("The tool is not dumping experimental features, please set them by hand afterward");
let dump_uid = started_at.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
)).unwrap();
let path = dump_dir.join(format!("{}.dump", dump_uid));
let file = File::create(&path)?;
dump.persist_to(BufWriter::new(file))?;
eprintln!("Dump exported at path {:?}", path.display());
Ok(())
}

View File

@ -1,24 +0,0 @@
use std::borrow::Cow;
use std::convert::TryInto;
use meilisearch_types::heed::{BytesDecode, BytesEncode};
use uuid::Uuid;
/// A heed codec for value of struct Uuid.
pub struct UuidCodec;
impl<'a> BytesDecode<'a> for UuidCodec {
type DItem = Uuid;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
bytes.try_into().ok().map(Uuid::from_bytes)
}
}
impl BytesEncode<'_> for UuidCodec {
type EItem = Uuid;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
Some(Cow::Borrowed(item.as_bytes()))
}
}

View File

@ -17,7 +17,7 @@ bincode = "1.3.3"
bstr = "1.4.0"
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
byteorder = "1.4.3"
charabia = { version = "0.8.5", default-features = false }
charabia = { version = "0.8.3", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.8"
deserr = { version = "0.6.0", features = ["actix-web"]}
@ -26,8 +26,8 @@ flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
fxhash = "0.2.1"
geoutils = "0.5.1"
grenad = { version = "0.4.5", default-features = false, features = [
"rayon", "tempfile"
grenad = { version = "0.4.4", default-features = false, features = [
"tempfile",
] }
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
"lmdb", "read-txn-no-tls"
@ -79,11 +79,10 @@ big_s = "1.0.2"
insta = "1.29.0"
maplit = "1.0.2"
md5 = "0.7.0"
meili-snap = { path = "../meili-snap" }
rand = { version = "0.8.5", features = ["small_rng"] }
[features]
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
# Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml
@ -107,6 +106,3 @@ thai = ["charabia/thai"]
# allow greek specialized tokenization
greek = ["charabia/greek"]
# allow khmer specialized tokenization
khmer = ["charabia/khmer"]

View File

@ -1,6 +1,5 @@
mod builder;
mod enriched;
mod primary_key;
mod reader;
mod serde_impl;
@ -12,7 +11,6 @@ use bimap::BiHashMap;
pub use builder::DocumentsBatchBuilder;
pub use enriched::{EnrichedDocument, EnrichedDocumentsBatchCursor, EnrichedDocumentsBatchReader};
use obkv::KvReader;
pub use primary_key::{DocumentIdExtractionError, FieldIdMapper, PrimaryKey, DEFAULT_PRIMARY_KEY};
pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
use serde::{Deserialize, Serialize};
@ -89,12 +87,6 @@ impl DocumentsBatchIndex {
}
}
impl FieldIdMapper for DocumentsBatchIndex {
fn id(&self, name: &str) -> Option<FieldId> {
self.id(name)
}
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Error parsing number {value:?} at line {line}: {error}")]

View File

@ -1,172 +0,0 @@
use std::iter;
use std::result::Result as StdResult;
use serde_json::Value;
use crate::{FieldId, InternalError, Object, Result, UserError};
/// The symbol used to define levels in a nested primary key.
const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';
/// The default primary that is used when not specified.
pub const DEFAULT_PRIMARY_KEY: &str = "id";
/// Trait for objects that can map the name of a field to its [`FieldId`].
pub trait FieldIdMapper {
/// Attempts to map the passed name to its [`FieldId`].
///
/// `None` if the field with this name was not found.
fn id(&self, name: &str) -> Option<FieldId>;
}
/// A type that represent the type of primary key that has been set
/// for this index, a classic flat one or a nested one.
#[derive(Debug, Clone, Copy)]
pub enum PrimaryKey<'a> {
Flat { name: &'a str, field_id: FieldId },
Nested { name: &'a str },
}
pub enum DocumentIdExtractionError {
InvalidDocumentId(UserError),
MissingDocumentId,
TooManyDocumentIds(usize),
}
impl<'a> PrimaryKey<'a> {
pub fn new(path: &'a str, fields: &impl FieldIdMapper) -> Option<Self> {
Some(if path.contains(PRIMARY_KEY_SPLIT_SYMBOL) {
Self::Nested { name: path }
} else {
let field_id = fields.id(path)?;
Self::Flat { name: path, field_id }
})
}
pub fn name(&self) -> &str {
match self {
PrimaryKey::Flat { name, .. } => name,
PrimaryKey::Nested { name } => name,
}
}
pub fn document_id(
&self,
document: &obkv::KvReader<FieldId>,
fields: &impl FieldIdMapper,
) -> Result<StdResult<String, DocumentIdExtractionError>> {
match self {
PrimaryKey::Flat { name: _, field_id } => match document.get(*field_id) {
Some(document_id_bytes) => {
let document_id = serde_json::from_slice(document_id_bytes)
.map_err(InternalError::SerdeJson)?;
match validate_document_id_value(document_id)? {
Ok(document_id) => Ok(Ok(document_id)),
Err(user_error) => {
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
}
}
}
None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
},
nested @ PrimaryKey::Nested { .. } => {
let mut matching_documents_ids = Vec::new();
for (first_level_name, right) in nested.possible_level_names() {
if let Some(field_id) = fields.id(first_level_name) {
if let Some(value_bytes) = document.get(field_id) {
let object = serde_json::from_slice(value_bytes)
.map_err(InternalError::SerdeJson)?;
fetch_matching_values(object, right, &mut matching_documents_ids);
if matching_documents_ids.len() >= 2 {
return Ok(Err(DocumentIdExtractionError::TooManyDocumentIds(
matching_documents_ids.len(),
)));
}
}
}
}
match matching_documents_ids.pop() {
Some(document_id) => match validate_document_id_value(document_id)? {
Ok(document_id) => Ok(Ok(document_id)),
Err(user_error) => {
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
}
},
None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
}
}
}
}
/// Returns an `Iterator` that gives all the possible fields names the primary key
/// can have depending of the first level name and depth of the objects.
pub fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
let name = self.name();
name.match_indices(PRIMARY_KEY_SPLIT_SYMBOL)
.map(move |(i, _)| (&name[..i], &name[i + PRIMARY_KEY_SPLIT_SYMBOL.len_utf8()..]))
.chain(iter::once((name, "")))
}
}
fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {
match value {
Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),
otherwise => output.push(otherwise),
}
}
fn fetch_matching_values_in_object(
object: Object,
selector: &str,
base_key: &str,
output: &mut Vec<Value>,
) {
for (key, value) in object {
let base_key = if base_key.is_empty() {
key.to_string()
} else {
format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)
};
if starts_with(selector, &base_key) {
match value {
Value::Object(object) => {
fetch_matching_values_in_object(object, selector, &base_key, output)
}
value => output.push(value),
}
}
}
}
fn starts_with(selector: &str, key: &str) -> bool {
selector.strip_prefix(key).map_or(false, |tail| {
tail.chars().next().map(|c| c == PRIMARY_KEY_SPLIT_SYMBOL).unwrap_or(true)
})
}
// FIXME: move to a DocumentId struct
fn validate_document_id(document_id: &str) -> Option<&str> {
if !document_id.is_empty()
&& document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'))
{
Some(document_id)
} else {
None
}
}
pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
match document_id {
Value::String(string) => match validate_document_id(&string) {
Some(s) if s.len() == string.len() => Ok(Ok(string)),
Some(s) => Ok(Ok(s.to_string())),
None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
},
Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
}
}

View File

@ -89,6 +89,8 @@ pub enum FieldIdMapMissingEntry {
#[derive(Error, Debug)]
pub enum UserError {
#[error("A soft deleted internal document id have been used: `{document_id}`.")]
AccessingSoftDeletedDocument { document_id: DocumentId },
#[error("A document cannot contain more than 65,535 fields.")]
AttributeLimitReached,
#[error(transparent)]

Some files were not shown because too many files have changed in this diff Show More