Compare commits

..

17 Commits

583 changed files with 13578 additions and 39847 deletions

View File

@ -23,8 +23,7 @@ A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
**Meilisearch version:**
[e.g. v0.20.0]
**Meilisearch version:** [e.g. v0.20.0]
**Additional context**
Additional information that may be relevant to the issue.

View File

@ -1,34 +0,0 @@
---
name: New sprint issue
about: ⚠️ Should only be used by the engine team ⚠️
title: ''
labels: ''
assignees: ''
---
Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
Related product discussion:
Related spec: WIP
## Motivation
<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
## Usage
<!---Write a quick description of the usage if the usage has already been defined-->
Refer to the final spec to know the details and the final decisions about the usage.
## TODO
<!---Feel free to adapt this list with more technical/product steps-->
- [ ] Release a prototype
- [ ] If prototype validated, merge changes into `main`
- [ ] Update the spec
## Impacted teams
<!---Ping the related teams. Ask for the engine manager if any hesitation-->

View File

@ -1,41 +1,24 @@
#!/usr/bin/env bash
set -eu -o pipefail
#!/bin/bash
check_tag() {
local expected=$1
local actual=$2
local filename=$3
if [[ $actual != $expected ]]; then
echo >&2 "Error: the current tag does not match the version in $filename: found $actual, expected $expected"
return 1
fi
# check_tag $current_tag $file_tag $file_name
function check_tag {
if [[ "$1" != "$2" ]]; then
echo "Error: the current tag does not match the version in Cargo.toml: found $2 - expected $1"
ret=1
fi
}
read_version() {
grep '^version = ' | cut -d \" -f 2
}
if [[ -z "${GITHUB_REF:-}" ]]; then
echo >&2 "Error: GITHUB_REF is not set"
exit 1
fi
if [[ ! "$GITHUB_REF" =~ ^refs/tags/v[0-9]+\.[0-9]+\.[0-9]+(-[a-z0-9]+)?$ ]]; then
echo >&2 "Error: GITHUB_REF is not a valid tag: $GITHUB_REF"
exit 1
fi
current_tag=${GITHUB_REF#refs/tags/v}
ret=0
current_tag=${GITHUB_REF#'refs/tags/v'}
toml_tag="$(cat Cargo.toml | read_version)"
check_tag "$current_tag" "$toml_tag" Cargo.toml || ret=1
file_tag="$(grep '^version = ' Cargo.toml | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')"
check_tag $current_tag $file_tag
lock_tag=$(grep -A 1 '^name = "meilisearch-auth"' Cargo.lock | read_version)
check_tag "$current_tag" "$lock_tag" Cargo.lock || ret=1
lock_file='Cargo.lock'
lock_tag=$(grep -A 1 'name = "meilisearch-auth"' $lock_file | grep version | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')
check_tag $current_tag $lock_tag $lock_file
if (( ret == 0 )); then
echo 'OK'
if [[ "$ret" -eq 0 ]] ; then
echo 'OK'
fi
exit $ret

19
.github/uffizzi/Dockerfile vendored Normal file
View File

@ -0,0 +1,19 @@
# Run
FROM uffizzi/ttyd:alpine
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker
ENV MEILI_NO_ANALYTICS true
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
COPY target/x86_64-unknown-linux-musl/release/meilisearch /bin/meilisearch
RUN ln -s /bin/meilisearch /meilisearch
WORKDIR /meili_data
EXPOSE 7700/tcp
ENTRYPOINT ["tini", "--"]
CMD ["ttyd", "/bin/zsh"]

View File

@ -0,0 +1,26 @@
version: "3"
x-uffizzi:
ingress:
service: nginx
port: 8081
services:
meilisearch:
image: "${MEILISEARCH_IMAGE}"
restart: unless-stopped
ports:
- "7681:7681"
- "7700:7700"
deploy:
resources:
limits:
memory: 500M
nginx:
image: nginx:alpine
restart: unless-stopped
ports:
- "8081:8081"
volumes:
- ./.github/uffizzi/nginx:/etc/nginx

28
.github/uffizzi/nginx/nginx.conf vendored Normal file
View File

@ -0,0 +1,28 @@
events {
worker_connections 4096; ## Default: 1024
}
http {
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen 8081;
location / {
proxy_pass http://localhost:7681;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
}
location /meilisearch/ {
# rewrite /meilisearch/(.*) /$1 break;
proxy_pass http://localhost:7700/;
}
}
}

View File

@ -1,24 +0,0 @@
name: Run the indexing fuzzer
on:
push:
branches:
- main
jobs:
fuzz:
name: Setup the action
runs-on: ubuntu-latest
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Run benchmarks
- name: Run the fuzzer
run: |
cargo run --release --bin fuzz-indexing

View File

@ -35,7 +35,7 @@ jobs:
- name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb

View File

@ -54,7 +54,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
@ -87,7 +87,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
@ -96,12 +96,14 @@ jobs:
publish-macos-apple-silicon:
name: Publish binary for macOS silicon
runs-on: macos-12
runs-on: ${{ matrix.os }}
needs: check-version
strategy:
fail-fast: false
matrix:
include:
- target: aarch64-apple-darwin
- os: macos-12
target: aarch64-apple-darwin
asset_name: meilisearch-macos-apple-silicon
steps:
- name: Checkout repository
@ -121,7 +123,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
@ -130,29 +132,21 @@ jobs:
publish-aarch64:
name: Publish binary for aarch64
runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
needs: check-version
container:
# Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:18.04
strategy:
fail-fast: false
matrix:
include:
- target: aarch64-unknown-linux-gnu
- build: aarch64
os: ubuntu-18.04
target: aarch64-unknown-linux-gnu
linker: gcc-aarch64-linux-gnu
use-cross: true
asset_name: meilisearch-linux-aarch64
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update -y && apt upgrade -y
apt-get install -y curl build-essential gcc-aarch64-linux-gnu
- name: Set up Docker for cross compilation
run: |
apt-get install -y curl apt-transport-https ca-certificates software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain
uses: actions-rs/toolchain@v1
with:
@ -160,7 +154,15 @@ jobs:
profile: minimal
target: ${{ matrix.target }}
override: true
- name: APT update
run: |
sudo apt update
- name: Install target specific tools
if: matrix.use-cross
run: |
sudo apt-get install -y ${{ matrix.linker }}
- name: Configure target aarch64 GNU
if: matrix.target == 'aarch64-unknown-linux-gnu'
## Environment variable is not passed using env:
## LD gold won't work with MUSL
# env:
@ -174,16 +176,14 @@ jobs:
uses: actions-rs/cargo@v1
with:
command: build
use-cross: true
use-cross: ${{ matrix.use-cross }}
args: --release --target ${{ matrix.target }}
env:
CROSS_DOCKER_IN_DOCKER: true
- name: List target output files
run: ls -lR ./target
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch

View File

@ -1,226 +0,0 @@
# If any test fails, the engine team should ensure the "breaking" changes are expected and contact the integration team
name: SDKs tests
on:
workflow_dispatch:
inputs:
docker_image:
description: 'The Meilisearch Docker image used'
required: false
default: nightly
schedule:
- cron: "0 6 * * MON" # Every Monday at 6:00AM
env:
MEILI_MASTER_KEY: 'masterKey'
MEILI_NO_ANALYTICS: 'true'
jobs:
define-docker-image:
runs-on: ubuntu-latest
outputs:
docker-image: ${{ steps.define-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v3
- name: Define the Docker image we need to use
id: define-image
run: |
event=${{ github.event_name }}
echo "docker-image=nightly" >> $GITHUB_OUTPUT
if [[ $event == 'workflow_dispatch' ]]; then
echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
fi
meilisearch-js-tests:
needs: define-docker-image
name: JS SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js
- name: Setup node
uses: actions/setup-node@v3
with:
cache: 'yarn'
- name: Install dependencies
run: yarn --dev
- name: Run tests
run: yarn test
- name: Build project
run: yarn build
- name: Run ESM env
run: yarn test:env:esm
- name: Run Node.js env
run: yarn test:env:nodejs
- name: Run node typescript env
run: yarn test:env:node-ts
- name: Run Browser env
run: yarn test:env:browser
instant-meilisearch-tests:
needs: define-docker-image
name: instant-meilisearch tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/instant-meilisearch
- name: Setup node
uses: actions/setup-node@v3
with:
cache: yarn
- name: Install dependencies
run: yarn install
- name: Run tests
run: yarn test
- name: Build all the playgrounds and the packages
run: yarn build
meilisearch-php-tests:
needs: define-docker-image
name: PHP SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-php
- name: Install PHP
uses: shivammathur/setup-php@v2
with:
coverage: none
- name: Validate composer.json and composer.lock
run: composer validate
- name: Install dependencies
run: |
composer remove --dev friendsofphp/php-cs-fixer --no-update --no-interaction
composer update --prefer-dist --no-progress
- name: Run test suite - default HTTP client (Guzzle 7)
run: |
sh scripts/tests.sh
composer remove --dev guzzlehttp/guzzle http-interop/http-factory-guzzle
meilisearch-python-tests:
needs: define-docker-image
name: Python SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-python
- name: Set up Python
uses: actions/setup-python@v4
- name: Install pipenv
uses: dschep/install-pipenv-action@v1
- name: Install dependencies
run: pipenv install --dev --python=${{ matrix.python-version }}
- name: Test with pytest
run: pipenv run pytest
meilisearch-go-tests:
needs: define-docker-image
name: Go SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: stable
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-go
- name: Get dependencies
run: |
go get -v -t -d ./...
if [ -f Gopkg.toml ]; then
curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
dep ensure
fi
- name: Run integration tests
run: go test -v ./...
meilisearch-ruby-tests:
needs: define-docker-image
name: Ruby SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-ruby
- name: Set up Ruby 3
uses: ruby/setup-ruby@v1
with:
ruby-version: 3
- name: Install ruby dependencies
run: bundle install --with test
- name: Run test suite
run: bundle exec rspec
meilisearch-rust-tests:
needs: define-docker-image
name: Rust SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rust
- name: Build
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose

View File

@ -30,20 +30,20 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Setup test with Rust stable
- name: Run test with Rust stable
if: github.event_name != 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Setup test with Rust nightly
- name: Run test with Rust nightly
if: github.event_name == 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.0
uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@ -65,7 +65,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.0
uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@ -105,29 +105,6 @@ jobs:
command: test
args: --workspace --locked --release --all-features
test-disabled-tokenization:
name: Test disabled tokenization
runs-on: ubuntu-latest
container:
image: ubuntu:18.04
if: github.event_name == 'schedule'
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo tree without default features and check lindera is not present
run: |
cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz
- name: Run cargo tree with default features and check lindera is pressent
run: |
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
@ -146,7 +123,7 @@ jobs:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.0
uses: Swatinem/rust-cache@v2.2.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@ -161,16 +138,17 @@ jobs:
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: 1.69.0
toolchain: 1.67.0
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.0
uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
command: clippy
args: --all-targets -- --deny warnings
# allow unlined_format_args https://github.com/rust-lang/rust-clippy/issues/10087
args: --all-targets -- --deny warnings --allow clippy::uninlined_format_args
fmt:
name: Run Rustfmt
@ -184,7 +162,7 @@ jobs:
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.0
uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

120
.github/workflows/uffizzi-build.yml vendored Normal file
View File

@ -0,0 +1,120 @@
name: Uffizzi - Build PR Image
on:
pull_request:
types: [opened,synchronize,reopened,closed]
jobs:
build-meilisearch:
name: Build and push `meilisearch`
runs-on: ubuntu-latest
outputs:
tags: ${{ steps.meta.outputs.tags }}
if: ${{ github.event.action != 'closed' }}
steps:
- name: checkout
uses: actions/checkout@v3
- run: sudo apt-get install musl-tools
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
target: x86_64-unknown-linux-musl
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.1
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --target x86_64-unknown-linux-musl --release
- name: Remove dockerignore so we can use the target folder in our docker build
run: rm -f .dockerignore
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Generate UUID image name
id: uuid
run: echo "UUID_TAG=$(uuidgen)" >> $GITHUB_ENV
- name: Docker metadata
id: meta
uses: docker/metadata-action@v4
with:
images: registry.uffizzi.com/${{ env.UUID_TAG }}
tags: |
type=raw,value=60d
- name: Build Image
uses: docker/build-push-action@v4
with:
context: ./
file: .github/uffizzi/Dockerfile
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
push: true
cache-from: type=gha
cache-to: type=gha,mode=max
render-compose-file:
name: Render Docker Compose File
# Pass output of this workflow to another triggered by `workflow_run` event.
runs-on: ubuntu-latest
needs:
- build-meilisearch
outputs:
compose-file-cache-key: ${{ env.COMPOSE_FILE_HASH }}
steps:
- name: Checkout git repo
uses: actions/checkout@v3
- name: Render Compose File
run: |
MEILISEARCH_IMAGE=$(echo ${{ needs.build-meilisearch.outputs.tags }})
export MEILISEARCH_IMAGE
# Render simple template from environment variables.
envsubst < .github/uffizzi/docker-compose.uffizzi.yml > docker-compose.rendered.yml
cat docker-compose.rendered.yml
- name: Upload Rendered Compose File as Artifact
uses: actions/upload-artifact@v3
with:
name: preview-spec
path: docker-compose.rendered.yml
retention-days: 2
- name: Serialize PR Event to File
run: |
cat << EOF > event.json
${{ toJSON(github.event) }}
EOF
- name: Upload PR Event as Artifact
uses: actions/upload-artifact@v3
with:
name: preview-spec
path: event.json
retention-days: 2
delete-preview:
name: Call for Preview Deletion
runs-on: ubuntu-latest
if: ${{ github.event.action == 'closed' }}
steps:
# If this PR is closing, we will not render a compose file nor pass it to the next workflow.
- name: Serialize PR Event to File
run: |
cat << EOF > event.json
${{ toJSON(github.event) }}
EOF
- name: Upload PR Event as Artifact
uses: actions/upload-artifact@v3
with:
name: preview-spec
path: event.json
retention-days: 2

View File

@ -0,0 +1,103 @@
name: Uffizzi - Deploy Preview
on:
workflow_run:
workflows:
- "Uffizzi - Build PR Image"
types:
- completed
jobs:
cache-compose-file:
name: Cache Compose File
runs-on: ubuntu-latest
if: ${{ github.event.workflow_run.conclusion == 'success' }}
outputs:
compose-file-cache-key: ${{ env.COMPOSE_FILE_HASH }}
pr-number: ${{ env.PR_NUMBER }}
expected-url: ${{ env.EXPECTED_URL }}
steps:
- name: 'Download artifacts'
# Fetch output (zip archive) from the workflow run that triggered this workflow.
uses: actions/github-script@v6
with:
script: |
let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: context.payload.workflow_run.id,
});
let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => {
return artifact.name == "preview-spec"
})[0];
let download = await github.rest.actions.downloadArtifact({
owner: context.repo.owner,
repo: context.repo.repo,
artifact_id: matchArtifact.id,
archive_format: 'zip',
});
let fs = require('fs');
fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/preview-spec.zip`, Buffer.from(download.data));
- name: 'Unzip artifact'
run: unzip preview-spec.zip
- name: Read Event into ENV
run: |
echo 'EVENT_JSON<<EOF' >> $GITHUB_ENV
cat event.json >> $GITHUB_ENV
echo 'EOF' >> $GITHUB_ENV
- name: Hash Rendered Compose File
id: hash
# If the previous workflow was triggered by a PR close event, we will not have a compose file artifact.
if: ${{ fromJSON(env.EVENT_JSON).action != 'closed' }}
run: echo "COMPOSE_FILE_HASH=$(md5sum docker-compose.rendered.yml | awk '{ print $1 }')" >> $GITHUB_ENV
- name: Cache Rendered Compose File
if: ${{ fromJSON(env.EVENT_JSON).action != 'closed' }}
uses: actions/cache@v3
with:
path: docker-compose.rendered.yml
key: ${{ env.COMPOSE_FILE_HASH }}
- name: Read PR Number From Event Object
id: pr
run: echo "PR_NUMBER=${{ fromJSON(env.EVENT_JSON).number }}" >> $GITHUB_ENV
- name: DEBUG - Print Job Outputs
if: ${{ runner.debug }}
run: |
echo "PR number: ${{ env.PR_NUMBER }}"
echo "Compose file hash: ${{ env.COMPOSE_FILE_HASH }}"
cat event.json
- name: Add expected URL env var
if: ${{ runner.debug }}
run: |
REPO=$(echo ${{ github.repository }} | sed 's/\./+/g')
echo "EXPECTED_URL=${{ inputs.server }}/github.com/$REPO/pull/${{ env.PR_NUMBER }}" >> $GITHUB_ENV
deploy-uffizzi-preview:
name: Use Remote Workflow to Preview on Uffizzi
needs:
- cache-compose-file
uses: UffizziCloud/preview-action/.github/workflows/reusable.yaml@v2
with:
# If this workflow was triggered by a PR close event, cache-key will be an empty string
# and this reusable workflow will delete the preview deployment.
compose-file-cache-key: ${{ needs.cache-compose-file.outputs.compose-file-cache-key }}
compose-file-cache-path: docker-compose.rendered.yml
server: https://app.uffizzi.com
pr-number: ${{ needs.cache-compose-file.outputs.pr-number }}
description: |
The meilisearch preview environment contains a web terminal from where you can run the
`meilisearch` command. You should be able to access this instance of meilisearch running in
the preview from the link Meilisearch Endpoint link given below.
Web Terminal Endpoint : <uffizzi-url>
Meilisearch Endpoint : <uffizzi-url>/meilisearch
permissions:
contents: read
pull-requests: write
id-token: write

View File

@ -18,9 +18,9 @@ If Meilisearch does not offer optimized support for your language, please consid
## Assumptions
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests (PR)](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) workflow.**
2. **You've read the Meilisearch [documentation](https://www.meilisearch.com/docs).**
3. **You know about the [Meilisearch community on Discord](https://discord.meilisearch.com).
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.**
2. **You've read the Meilisearch [documentation](https://docs.meilisearch.com).**
3. **You know about the [Meilisearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
Please use this for help.**
## How to Contribute
@ -120,9 +120,29 @@ The full Meilisearch release process is described in [this guide](https://github
Depending on the developed feature, you might need to provide a prototyped version of Meilisearch to make it easier to test by the users.
This happens in two steps:
- [Release the prototype](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#how-to-publish-a-prototype)
- [Communicate about it](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#communication)
The prototype name must follow this convention: `prototype-X-Y` where
- `X` is the feature name formatted in `kebab-case`. It should not end with a single number.
- `Y` is the version of the prototype, starting from `0`.
✅ Example: `prototype-auto-resize-0`. </br>
❌ Bad example: `auto-resize-0`: lacks the `prototype` prefix. </br>
❌ Bad example: `prototype-auto-resize`: lacks the version suffix. </br>
❌ Bad example: `prototype-auto-resize-0-0`: feature name ends with a single number.
Steps to create a prototype:
1. In your terminal, go to the last commit of your branch (the one you want to provide as a prototype).
2. Create a tag following the convention: `git tag prototype-X-Y`
3. Run Meilisearch and check that its launch summary features a line: `Prototype: prototype-X-Y` (you may need to switch branches and back after tagging for this to work).
3. Push the tag: `git push origin prototype-X-Y`
4. Check the [Docker CI](https://github.com/meilisearch/meilisearch/actions/workflows/publish-docker-images.yml) is now running.
🐳 Once the CI has finished to run (~1h30), a Docker image named `prototype-X-Y` will be available on [DockerHub](https://hub.docker.com/repository/docker/getmeili/meilisearch/general). People can use it with the following command: `docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-X-Y`. <br>
More information about [how to run Meilisearch with Docker](https://docs.meilisearch.com/learn/cookbooks/docker.html#download-meilisearch-with-docker).
⚙️ However, no binaries will be created. If the users do not use Docker, they can go to the `prototype-X-Y` tag in the Meilisearch repository and compile from the source code.
⚠️ When sharing a prototype with users, remind them to not use it in production. Prototypes are solely for test purposes.
### Release assets

1768
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -9,16 +9,16 @@ members = [
"dump",
"file-store",
"permissive-json-pointer",
"cluster",
"milli",
"filter-parser",
"flatten-serde-json",
"json-depth-checker",
"benchmarks",
"fuzzers",
"benchmarks"
]
[workspace.package]
version = "1.3.0"
version = "1.1.0"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"

View File

@ -1,19 +0,0 @@
# Profiling Meilisearch
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
![An example profiling with Puffin viewer](assets/profiling-example.png)
## Profiling the Indexing Process
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
## Profiling the Search Process
We still need to take the time to profile the search side of the engine with Puffin. It would require time to profile the filtering phase, query parsing, creation, and execution. We could even profile the Actix HTTP server.
The only issue we see is the framing system. Puffin requires a global frame-based profiling phase, which collides with Meilisearch's ability to accept and answer multiple requests on different threads simultaneously.

View File

@ -1,26 +1,21 @@
<p align="center">
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
</a>
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
</a>
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
</p>
<h4 align="center">
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Website</a> |
<a href="https://www.meilisearch.com">Website</a> |
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
<a href="https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Meilisearch Cloud</a> |
<a href="https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Blog</a> |
<a href="https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Documentation</a> |
<a href="https://www.meilisearch.com/docs/faq?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">FAQ</a> |
<a href="https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Discord</a>
<a href="https://blog.meilisearch.com">Blog</a> |
<a href="https://docs.meilisearch.com">Documentation</a> |
<a href="https://docs.meilisearch.com/faq/">FAQ</a> |
<a href="https://discord.meilisearch.com">Discord</a>
</h4>
<p align="center">
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
<a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
<a href="https://app.bors.tech/repositories/26457"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
</p>
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
@ -28,72 +23,72 @@
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
<p align="center" name="demo">
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
<a href="https://where2watch.meilisearch.com/#gh-light-mode-only" target="_blank">
<img src="assets/demo-light.gif#gh-light-mode-only" alt="A bright colored application for finding movies screening near the user">
</a>
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-dark-mode-only" target="_blank">
<a href="https://where2watch.meilisearch.com/#gh-dark-mode-only" target="_blank">
<img src="assets/demo-dark.gif#gh-dark-mode-only" alt="A dark colored application for finding movies screening near the user">
</a>
</p>
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥
## ✨ Features
- **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
- **[Typo tolerance](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering and faceted search](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://docs.meilisearch.com/learn/advanced/sorting.html):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html):** filter and sort documents based on geographic data
- **[Extensive language support](https://docs.meilisearch.com/learn/what_is_meilisearch/language.html):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://docs.meilisearch.com/learn/security/master_api_keys.html):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://docs.meilisearch.com/learn/security/tenant_tokens.html):** personalize search results for any number of application tenants
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
- **[RESTful API](https://docs.meilisearch.com/reference/api/overview.html):** integrate Meilisearch in your technical stack with our plugins and SDKs
- **Easy to install, deploy, and maintain**
## 📖 Documentation
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
You can consult Meilisearch's documentation at [https://docs.meilisearch.com](https://docs.meilisearch.com/).
## 🚀 Getting started
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://docs.meilisearch.com/learn/getting_started/quick_start.html) guide.
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
You may also want to check out [Meilisearch 101](https://docs.meilisearch.com/learn/getting_started/filtering_and_sorting.html) for an introduction to some of Meilisearch's most popular features.
## ⚡ Supercharge your Meilisearch experience
## ☁️ Meilisearch cloud
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=engine&utm_medium=meilisearch). Get started with a 14-day free trial! No credit card required.
Let us manage your infrastructure so you can focus on integrating a great search experience. Try [Meilisearch Cloud](https://meilisearch.com/pricing) today.
## 🧰 SDKs & integration tools
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-link).
Take a look at the complete [Meilisearch integration list](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html).
[![Logos belonging to different languages and frameworks supported by Meilisearch, including React, Ruby on Rails, Go, Rust, and PHP](assets/integrations.png)](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-logos)
[![Logos belonging to different languages and frameworks supported by Meilisearch, including React, Ruby on Rails, Go, Rust, and PHP](assets/integrations.png)](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html)
## ⚙️ Advanced usage
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) close at hand.
Experienced users will want to keep our [API Reference](https://docs.meilisearch.com/reference/api) close at hand.
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html), [sorting](https://docs.meilisearch.com/learn/advanced/sorting.html), [geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html), [API keys](https://docs.meilisearch.com/learn/security/master_api_keys.html), and [tenant tokens](https://docs.meilisearch.com/learn/security/tenant_tokens.html).
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://docs.meilisearch.com/learn/core_concepts/documents.html) and [indexes](https://docs.meilisearch.com/learn/core_concepts/indexes.html).
## 📊 Telemetry
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html#how-to-disable-data-collection) whenever you want.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html) of our documentation.
## 📫 Get in touch!
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/)
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
@ -102,6 +97,7 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
- For everything else, please check [this page listing some of the other places where you can find us](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html)
Thank you for your support!

File diff suppressed because it is too large Load Diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

View File

@ -1,19 +0,0 @@
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'codelab-monitor'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'meilisearch'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
- targets: ['localhost:7700']

View File

@ -11,11 +11,11 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.70"
csv = "1.2.1"
milli = { path = "../milli" }
mimalloc = { version = "0.1.36", default-features = false }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
anyhow = "1.0.65"
csv = "1.1.6"
milli = { path = "../milli", default-features = false }
mimalloc = { version = "0.1.29", default-features = false }
serde_json = { version = "1.0.85", features = ["preserve_order"] }
[dev-dependencies]
criterion = { version = "0.4.0", features = ["html_reports"] }
@ -24,14 +24,14 @@ rand_chacha = "0.3.1"
roaring = "0.10.1"
[build-dependencies]
anyhow = "1.0.70"
bytes = "1.4.0"
anyhow = "1.0.65"
bytes = "1.2.1"
convert_case = "0.6.0"
flate2 = "1.0.25"
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
flate2 = "1.0.24"
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]
default = ["milli/default"]
[[bench]]
name = "search_songs"
@ -48,3 +48,7 @@ harness = false
[[bench]]
name = "indexing"
harness = false
[[bench]]
name = "formatting"
harness = false

View File

@ -119,9 +119,9 @@ _[Download the `smol-wiki` dataset](https://milli-benchmarks.fra1.digitaloceansp
### Movies
`movies` is a really small dataset we uses as our example in the [getting started](https://www.meilisearch.com/docs/learn/getting_started/quick_start)
`movies` is a really small dataset we uses as our example in the [getting started](https://docs.meilisearch.com/learn/getting_started/)
_[Download the `movies` dataset](https://www.meilisearch.com/movies.json)._
_[Download the `movies` dataset](https://docs.meilisearch.com/movies.json)._
### All Countries

View File

@ -0,0 +1,67 @@
use std::rc::Rc;
use criterion::{criterion_group, criterion_main};
use milli::tokenizer::TokenizerBuilder;
use milli::{FormatOptions, MatcherBuilder, MatchingWord, MatchingWords};
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
struct Conf<'a> {
name: &'a str,
text: &'a str,
matching_words: MatcherBuilder<'a, Vec<u8>>,
}
fn bench_formatting(c: &mut criterion::Criterion) {
#[rustfmt::skip]
let confs = &[
Conf {
name: "'the door d'",
text: r#"He used to do the door sounds in "Star Trek" with his mouth, phssst, phssst. The MD-11 passenger and cargo doors also tend to behave like electromagnetic apertures, because the doors do not have continuous electrical contact with the door frames around the door perimeter. But Theodor said that the doors don't work."#,
matching_words: MatcherBuilder::new(MatchingWords::new(vec![
(vec![Rc::new(MatchingWord::new("t".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("he".to_string(), 0, false).unwrap())], vec![0]),
(vec![Rc::new(MatchingWord::new("the".to_string(), 0, false).unwrap())], vec![0]),
(vec![Rc::new(MatchingWord::new("door".to_string(), 1, false).unwrap())], vec![1]),
(vec![Rc::new(MatchingWord::new("do".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("or".to_string(), 0, false).unwrap())], vec![0]),
(vec![Rc::new(MatchingWord::new("thedoor".to_string(), 1, false).unwrap())], vec![0, 1]),
(vec![Rc::new(MatchingWord::new("d".to_string(), 0, true).unwrap())], vec![2]),
(vec![Rc::new(MatchingWord::new("thedoord".to_string(), 1, true).unwrap())], vec![0, 1, 2]),
(vec![Rc::new(MatchingWord::new("doord".to_string(), 1, true).unwrap())], vec![1, 2]),
]
).unwrap(), TokenizerBuilder::default().build()),
},
];
let format_options = &[
FormatOptions { highlight: false, crop: None },
FormatOptions { highlight: true, crop: None },
FormatOptions { highlight: false, crop: Some(10) },
FormatOptions { highlight: true, crop: Some(10) },
FormatOptions { highlight: false, crop: Some(20) },
FormatOptions { highlight: true, crop: Some(20) },
];
for option in format_options {
let highlight = if option.highlight { "highlight" } else { "no-highlight" };
let name = match option.crop {
Some(size) => format!("{}-crop({})", highlight, size),
None => format!("{}-no-crop", highlight),
};
let mut group = c.benchmark_group(&name);
for conf in confs {
group.bench_function(conf.name, |b| {
b.iter(|| {
let mut matcher = conf.matching_words.build(conf.text);
matcher.format(*option);
})
});
}
group.finish();
}
}
criterion_group!(benches, bench_formatting);
criterion_main!(benches);

25
cluster/Cargo.toml Normal file
View File

@ -0,0 +1,25 @@
[package]
name = "cluster"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
ductile = "0.3.0"
serde = { version = "1.0.155", features = ["derive"] }
serde_json = "1.0.94"
thiserror = "1.0.39"
meilisearch-types = { path = "../meilisearch-types" }
roaring = { version = "0.10.1", features = ["serde"] }
log = "0.4.17"
crossbeam = "0.8.2"
bus = "2.3.0"
time = "0.3.20"
uuid = { version = "1.3.0", features = ["v4"] }
synchronoise = "1.0.1"

148
cluster/src/batch.rs Normal file
View File

@ -0,0 +1,148 @@
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::tasks::TaskId;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use uuid::Uuid;
/// Represents a combination of tasks that can all be processed at the same time.
///
/// A batch contains the set of tasks that it represents (accessible through
/// [`self.ids()`](Batch::ids)), as well as additional information on how to
/// be processed.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Batch {
TaskCancelation {
/// The task cancelation itself.
task: TaskId,
/// The date and time at which the previously processing tasks started.
previous_started_at: OffsetDateTime,
/// The list of tasks that were processing when this task cancelation appeared.
previous_processing_tasks: RoaringBitmap,
},
TaskDeletion(TaskId),
SnapshotCreation(Vec<TaskId>),
Dump(TaskId),
IndexOperation {
op: IndexOperation,
must_create_index: bool,
},
IndexCreation {
index_uid: String,
primary_key: Option<String>,
task: TaskId,
},
IndexUpdate {
index_uid: String,
primary_key: Option<String>,
task: TaskId,
},
IndexDeletion {
index_uid: String,
tasks: Vec<TaskId>,
index_has_been_created: bool,
},
IndexSwap {
task: TaskId,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum DocumentOperation {
Add(Uuid),
Delete(Vec<String>),
}
/// A [batch](Batch) that combines multiple tasks operating on an index.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum IndexOperation {
DocumentOperation {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
operations: Vec<DocumentOperation>,
tasks: Vec<TaskId>,
},
DocumentDeletion {
index_uid: String,
// The vec associated with each document deletion tasks.
documents: Vec<Vec<String>>,
tasks: Vec<TaskId>,
},
DocumentClear {
index_uid: String,
tasks: Vec<TaskId>,
},
Settings {
index_uid: String,
// The boolean indicates if it's a settings deletion or creation.
settings: Vec<(bool, Settings<Unchecked>)>,
tasks: Vec<TaskId>,
},
DocumentClearAndSetting {
index_uid: String,
cleared_tasks: Vec<TaskId>,
// The boolean indicates if it's a settings deletion or creation.
settings: Vec<(bool, Settings<Unchecked>)>,
settings_tasks: Vec<TaskId>,
},
SettingsAndDocumentOperation {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
operations: Vec<DocumentOperation>,
document_import_tasks: Vec<TaskId>,
// The boolean indicates if it's a settings deletion or creation.
settings: Vec<(bool, Settings<Unchecked>)>,
settings_tasks: Vec<TaskId>,
},
}
impl Batch {
pub fn ids(&self) -> impl Iterator<Item = TaskId> {
type Ret = Box<dyn Iterator<Item = TaskId>>;
match self {
Batch::TaskCancelation { task, .. } => Box::new(std::iter::once(*task)) as Ret,
Batch::TaskDeletion(task) => Box::new(std::iter::once(*task)) as Ret,
Batch::SnapshotCreation(tasks) => Box::new(tasks.clone().into_iter()) as Ret,
Batch::Dump(task) => Box::new(std::iter::once(*task)) as Ret,
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. } => {
Box::new(tasks.clone().into_iter()) as Ret
}
IndexOperation::DocumentDeletion { tasks, .. } => {
Box::new(tasks.clone().into_iter()) as Ret
}
IndexOperation::DocumentClear { tasks, .. } => {
Box::new(tasks.clone().into_iter()) as Ret
}
IndexOperation::Settings { tasks, .. } => {
Box::new(tasks.clone().into_iter()) as Ret
}
IndexOperation::DocumentClearAndSetting {
cleared_tasks, settings_tasks, ..
} => {
Box::new(cleared_tasks.clone().into_iter().chain(settings_tasks.clone())) as Ret
}
IndexOperation::SettingsAndDocumentOperation {
document_import_tasks,
settings_tasks,
..
} => Box::new(
document_import_tasks.clone().into_iter().chain(settings_tasks.clone()),
) as Ret,
},
Batch::IndexCreation { task, .. } => Box::new(std::iter::once(*task)) as Ret,
Batch::IndexUpdate { task, .. } => Box::new(std::iter::once(*task)) as Ret,
Batch::IndexDeletion { tasks, .. } => Box::new(tasks.clone().into_iter()) as Ret,
Batch::IndexSwap { task } => Box::new(std::iter::once(*task)) as Ret,
}
}
}

276
cluster/src/leader.rs Normal file
View File

@ -0,0 +1,276 @@
use std::net::ToSocketAddrs;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{atomic, Arc, Mutex, RwLock};
use std::time::Duration;
use bus::{Bus, BusReader};
use crossbeam::channel::{unbounded, Receiver, Sender};
use ductile::{ChannelReceiver, ChannelSender, ChannelServer};
use log::{info, warn};
use meilisearch_types::keys::Key;
use meilisearch_types::tasks::Task;
use synchronoise::SignalEvent;
use uuid::Uuid;
use crate::batch::Batch;
use crate::{ApiKeyOperation, Consistency, FollowerMsg, LeaderMsg};
#[derive(Clone)]
pub struct Leader {
task_ready_to_commit: Receiver<u32>,
broadcast_to_follower: Sender<LeaderMsg>,
needs_key_sender: Sender<Sender<Vec<Key>>>,
needs_key_receiver: Receiver<Sender<Vec<Key>>>,
pub wake_up: Arc<SignalEvent>,
new_followers: Arc<AtomicUsize>,
active_followers: Arc<AtomicUsize>,
batch_id: Arc<RwLock<u32>>,
}
impl Leader {
pub fn new(
listen_on: impl ToSocketAddrs + Send + 'static,
master_key: Option<String>,
) -> Leader {
let new_followers = Arc::new(AtomicUsize::new(0));
let active_followers = Arc::new(AtomicUsize::new(1));
let wake_up = Arc::new(SignalEvent::auto(true));
let (broadcast_to_follower, process_batch_receiver) = unbounded();
let (task_finished_sender, task_finished_receiver) = unbounded();
let (needs_key_sender, needs_key_receiver) = unbounded();
let nf = new_followers.clone();
let af = active_followers.clone();
let wu = wake_up.clone();
std::thread::spawn(move || {
Self::listener(
listen_on,
master_key,
nf,
af,
wu,
process_batch_receiver,
task_finished_sender,
)
});
Leader {
task_ready_to_commit: task_finished_receiver,
broadcast_to_follower,
needs_key_sender,
needs_key_receiver,
wake_up,
new_followers,
active_followers,
batch_id: Arc::default(),
}
}
pub fn has_new_followers(&self) -> bool {
self.new_followers.load(Ordering::Relaxed) != 0
}
/// Takes all the necessary channels to chat with the scheduler and give them
/// to each new followers
fn listener(
listen_on: impl ToSocketAddrs,
master_key: Option<String>,
new_followers: Arc<AtomicUsize>,
active_followers: Arc<AtomicUsize>,
wake_up: Arc<SignalEvent>,
broadcast_to_follower: Receiver<LeaderMsg>,
task_finished: Sender<u32>,
) {
let listener: ChannelServer<LeaderMsg, FollowerMsg> = if let Some(ref master_key) =
master_key
{
let mut enc = [0; 32];
let master_key = master_key.as_bytes();
if master_key.len() < 32 {
warn!("Master key is not secure, use a longer master key (at least 32 bytes long)");
}
enc.iter_mut().zip(master_key).for_each(|(enc, mk)| *enc = *mk);
info!("Listening with encryption enabled");
ChannelServer::bind_with_enc(listen_on, enc).unwrap()
} else {
ChannelServer::bind(listen_on).unwrap()
};
info!("Ready to the receive connections");
// We're going to broadcast all the batches to all our follower
let bus: Bus<LeaderMsg> = Bus::new(10);
let bus = Arc::new(Mutex::new(bus));
let b = bus.clone();
std::thread::spawn(move || loop {
let msg = broadcast_to_follower.recv().expect("Main thread is dead");
b.lock().unwrap().broadcast(msg);
});
for (sender, receiver, _addr) in listener {
let task_finished = task_finished.clone();
let nf = new_followers.clone();
let af = active_followers.clone();
let wu = wake_up.clone();
let process_batch = bus.lock().unwrap().add_rx();
std::thread::spawn(move || {
Self::follower(sender, receiver, nf, af, wu, process_batch, task_finished)
});
}
}
/// Allow a follower to chat with the scheduler
fn follower(
sender: ChannelSender<LeaderMsg>,
receiver: ChannelReceiver<FollowerMsg>,
new_followers: Arc<AtomicUsize>,
active_followers: Arc<AtomicUsize>,
wake_up: Arc<SignalEvent>,
mut broadcast_to_follower: BusReader<LeaderMsg>,
task_finished: Sender<u32>,
) {
let size = new_followers.fetch_add(1, Ordering::Relaxed) + 1;
wake_up.signal();
info!("A new follower joined the cluster. {} members.", size);
loop {
if let msg @ LeaderMsg::JoinFromDump(_) =
broadcast_to_follower.recv().expect("Main thread died")
{
// we exit the new_follower state and become an active follower even though
// the dump will takes some time to index
new_followers.fetch_sub(1, Ordering::Relaxed);
let size = active_followers.fetch_add(1, Ordering::Relaxed) + 1;
info!("A new follower became active. {} active members.", size);
sender.send(msg).unwrap();
break;
}
}
// send messages to the follower
std::thread::spawn(move || loop {
let msg = broadcast_to_follower.recv().expect("Main thread died");
match msg {
LeaderMsg::JoinFromDump(_) => (),
msg => {
if sender.send(msg).is_err() {
// the follower died, the logging and cluster size update should be done
// in the other thread
break;
}
}
}
});
// receive messages from the follower
loop {
match receiver.recv() {
Err(_) => break,
Ok(msg) => match msg {
FollowerMsg::ReadyToCommit(id) => {
task_finished.send(id).expect("Can't reach the main thread")
}
FollowerMsg::RegisterNewTask(_) => todo!(),
},
}
}
// if we exited from the previous loop it means the follower is down and should
// be removed from the cluster
let size = active_followers.fetch_sub(1, atomic::Ordering::Relaxed) - 1;
info!("A follower left the cluster. {} members.", size);
}
// ============= Everything related to the setup of the cluster
pub fn join_me(&self, dump: Vec<u8>) {
self.broadcast_to_follower
.send(LeaderMsg::JoinFromDump(dump))
.expect("Lost the link with the followers");
}
// ============= Everything related to the scheduler
pub fn starts_batch(&self, batch: Batch) {
let mut batch_id = self.batch_id.write().unwrap();
info!("Send the batch to process to the followers");
*batch_id += 1;
self.broadcast_to_follower
.send(LeaderMsg::StartBatch { id: *batch_id, batch })
.expect("Can't reach the cluster");
}
pub fn commit(&self, consistency_level: Consistency) {
info!("Wait until enough followers are ready to commit a batch");
let batch_id = self.batch_id.write().unwrap();
let mut nodes_ready_to_commit = 1;
loop {
let size = self.active_followers.load(atomic::Ordering::Relaxed);
info!("{nodes_ready_to_commit} nodes are ready to commit for a cluster size of {size}");
let all = nodes_ready_to_commit == size;
match consistency_level {
Consistency::One if nodes_ready_to_commit >= 1 || all => break,
Consistency::Two if nodes_ready_to_commit >= 2 || all => break,
Consistency::Quorum if nodes_ready_to_commit >= (size / 2) || all => break,
Consistency::All if all => break,
_ => (),
}
// we can't wait forever here because if a node dies the cluster size might get updated while we're stuck
match self.task_ready_to_commit.recv_timeout(Duration::new(1, 0)) {
Ok(id) if id == *batch_id => nodes_ready_to_commit += 1,
_ => continue,
};
}
info!("Tells all the follower to commit");
self.broadcast_to_follower.send(LeaderMsg::Commit(*batch_id)).unwrap();
}
pub fn register_new_task(&self, task: Task, update_file: Option<Vec<u8>>) {
info!("Tells all the follower to register a new task");
self.broadcast_to_follower
.send(LeaderMsg::RegisterNewTask { task, update_file })
.expect("Main thread is dead");
}
// ============= Everything related to the api-keys
pub fn insert_key(&self, key: Key) {
self.broadcast_to_follower
.send(LeaderMsg::ApiKeyOperation(ApiKeyOperation::Insert(key)))
.unwrap()
}
pub fn delete_key(&self, uuid: Uuid) {
self.broadcast_to_follower
.send(LeaderMsg::ApiKeyOperation(ApiKeyOperation::Delete(uuid)))
.unwrap()
}
pub fn needs_keys(&self) -> Sender<Vec<Key>> {
self.needs_key_receiver.recv().expect("The cluster is dead")
}
pub fn get_keys(&self) -> Vec<Key> {
let (send, rcv) = crossbeam::channel::bounded(1);
self.needs_key_sender.send(send).expect("The cluster is dead");
rcv.recv().expect("The auth controller is dead")
}
}

231
cluster/src/lib.rs Normal file
View File

@ -0,0 +1,231 @@
use std::net::ToSocketAddrs;
use std::str::FromStr;
use std::sync::{Arc, RwLock};
use batch::Batch;
use crossbeam::channel::{unbounded, Receiver, Sender};
use ductile::{connect_channel, connect_channel_with_enc, ChannelReceiver, ChannelSender};
use log::{info, warn};
use meilisearch_types::keys::Key;
use meilisearch_types::tasks::{KindWithContent, Task};
use serde::{Deserialize, Serialize};
pub mod batch;
mod leader;
pub use leader::Leader;
use uuid::Uuid;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Network issue occured")]
NetworkIssue,
#[error("Internal error:{0}")]
SerdeJson(#[from] serde_json::Error),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum LeaderMsg {
/// A dump to join the cluster
JoinFromDump(Vec<u8>),
/// Starts a new batch
StartBatch { id: u32, batch: Batch },
///Tell the follower to commit the update asap
Commit(u32),
///Tell the follower to commit the update asap
RegisterNewTask { task: Task, update_file: Option<Vec<u8>> },
///Tell the follower to commit the update asap
ApiKeyOperation(ApiKeyOperation),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FollowerMsg {
// Let the leader knows you're ready to commit
ReadyToCommit(u32),
RegisterNewTask(KindWithContent),
}
#[derive(Default, Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum Consistency {
One,
Two,
Quorum,
#[default]
All,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum ApiKeyOperation {
Insert(Key),
Delete(Uuid),
}
impl std::fmt::Display for Consistency {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Consistency::One => write!(f, "one"),
Consistency::Two => write!(f, "two"),
Consistency::Quorum => write!(f, "quorum"),
Consistency::All => write!(f, "all"),
}
}
}
impl FromStr for Consistency {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"one" => Ok(Consistency::One),
"two" => Ok(Consistency::Two),
"quorum" => Ok(Consistency::Quorum),
"all" => Ok(Consistency::All),
s => Err(format!(
"Unexpected value `{s}`, expected one of `one`, `two`, `quorum`, `all`"
)),
}
}
}
#[derive(Clone)]
pub enum Cluster {
Leader(Leader),
Follower(Follower),
}
#[derive(Clone)]
pub struct Follower {
sender: ChannelSender<FollowerMsg>,
get_batch: Receiver<(u32, Batch)>,
must_commit: Receiver<u32>,
register_new_task: Receiver<(Task, Option<Vec<u8>>)>,
api_key_op: Receiver<ApiKeyOperation>,
batch_id: Arc<RwLock<u32>>,
}
impl Follower {
pub fn join(leader: impl ToSocketAddrs, master_key: Option<String>) -> (Follower, Vec<u8>) {
let (sender, receiver) = if let Some(master_key) = master_key {
let mut enc = [0; 32];
let master_key = master_key.as_bytes();
if master_key.len() < 32 {
warn!("Master key is not secure, use a longer master key (at least 32 bytes long)");
}
enc.iter_mut().zip(master_key).for_each(|(enc, mk)| *enc = *mk);
info!("Connecting with encryption enabled");
connect_channel_with_enc(leader, &enc).unwrap()
} else {
connect_channel(leader).unwrap()
};
info!("Connection to the leader established");
info!("Waiting for the leader to contact us");
let state = receiver.recv().unwrap();
let dump = match state {
LeaderMsg::JoinFromDump(dump) => dump,
msg => panic!("Received unexpected message {msg:?}"),
};
let (get_batch_sender, get_batch_receiver) = unbounded();
let (must_commit_sender, must_commit_receiver) = unbounded();
let (register_task_sender, register_task_receiver) = unbounded();
let (create_api_key_sender, create_api_key_receiver) = unbounded();
std::thread::spawn(move || {
Self::router(
receiver,
get_batch_sender,
must_commit_sender,
register_task_sender,
create_api_key_sender,
);
});
(
Follower {
sender,
get_batch: get_batch_receiver,
must_commit: must_commit_receiver,
register_new_task: register_task_receiver,
api_key_op: create_api_key_receiver,
batch_id: Arc::default(),
},
dump,
)
}
fn router(
receiver: ChannelReceiver<LeaderMsg>,
get_batch: Sender<(u32, Batch)>,
must_commit: Sender<u32>,
register_new_task: Sender<(Task, Option<Vec<u8>>)>,
api_key_op: Sender<ApiKeyOperation>,
) {
loop {
match receiver.recv().expect("Lost connection to the leader") {
LeaderMsg::JoinFromDump(_) => {
warn!("Received a join from dump msg but Im already running : ignoring the message")
}
LeaderMsg::StartBatch { id, batch } => {
info!("Starting to process a new batch");
get_batch.send((id, batch)).expect("Lost connection to the main thread")
}
LeaderMsg::Commit(id) => {
info!("Must commit");
must_commit.send(id).expect("Lost connection to the main thread")
}
LeaderMsg::RegisterNewTask { task, update_file } => {
info!("Registered a new task");
register_new_task
.send((task, update_file))
.expect("Lost connection to the main thread")
}
LeaderMsg::ApiKeyOperation(key) => {
api_key_op.send(key).expect("Lost connection to the main thread")
}
}
}
}
pub fn get_new_batch(&self) -> Batch {
info!("Get new batch called");
let (id, batch) = self.get_batch.recv().expect("Lost connection to the leader");
info!("Got a new batch");
*self.batch_id.write().unwrap() = id;
batch
}
pub fn ready_to_commit(&self) {
info!("I'm ready to commit");
let batch_id = self.batch_id.read().unwrap();
self.sender.send(FollowerMsg::ReadyToCommit(*batch_id)).unwrap();
loop {
let id = self.must_commit.recv().expect("Lost connection to the leader");
#[allow(clippy::comparison_chain)]
if id == *batch_id {
break;
} else if id > *batch_id {
panic!("We missed a batch");
}
}
info!("I got the right to commit");
}
pub fn get_new_task(&self) -> (Task, Option<Vec<u8>>) {
self.register_new_task.recv().expect("Lost connection to the leader")
}
pub fn api_key_operation(&self) -> ApiKeyOperation {
info!("Creating a new api key");
self.api_key_op.recv().expect("Lost connection to the leader")
}
}

View File

@ -1,131 +1,130 @@
# This file shows the default configuration of Meilisearch.
# All variables are defined here: https://www.meilisearch.com/docs/learn/configuration/instance_options#environment-variables
# All variables are defined here: https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables
# Designates the location where database files will be created and retrieved.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#database-path
db_path = "./data.ms"
# Designates the location where database files will be created and retrieved.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#database-path
# Configures the instance's environment. Value must be either `production` or `development`.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#environment
env = "development"
# Configures the instance's environment. Value must be either `production` or `development`.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#environment
# The address on which the HTTP server will listen.
http_addr = "localhost:7700"
# The address on which the HTTP server will listen.
# Sets the instance's master key, automatically protecting all routes except GET /health.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#master-key
# master_key = "YOUR_MASTER_KEY_VALUE"
# Sets the instance's master key, automatically protecting all routes except GET /health.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#master-key
# no_analytics = true
# Deactivates Meilisearch's built-in telemetry when provided.
# Meilisearch automatically collects data from all instances that do not opt out using this flag.
# All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted at any time.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#disable-analytics
# no_analytics = true
# https://docs.meilisearch.com/learn/configuration/instance_options.html#disable-analytics
# Sets the maximum size of accepted payloads.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#payload-limit-size
http_payload_size_limit = "100 MB"
# Sets the maximum size of accepted payloads.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#payload-limit-size
log_level = "INFO"
# Defines how much detail should be present in Meilisearch's logs.
# Meilisearch currently supports six log levels, listed in order of increasing verbosity: `OFF`, `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE`
# https://www.meilisearch.com/docs/learn/configuration/instance_options#log-level
log_level = "INFO"
# https://docs.meilisearch.com/learn/configuration/instance_options.html#log-level
# Sets the maximum amount of RAM Meilisearch can use when indexing.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-memory
# max_indexing_memory = "2 GiB"
# Sets the maximum amount of RAM Meilisearch can use when indexing.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-memory
# Sets the maximum number of threads Meilisearch can use during indexing.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-threads
# max_indexing_threads = 4
# Sets the maximum number of threads Meilisearch can use during indexing.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-threads
#############
### DUMPS ###
#############
# Sets the directory where Meilisearch will create dump files.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#dump-directory
dump_dir = "dumps/"
# Sets the directory where Meilisearch will create dump files.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#dump-directory
# Imports the dump file located at the specified path. Path must point to a .dump file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-dump
# import_dump = "./path/to/my/file.dump"
# Imports the dump file located at the specified path. Path must point to a .dump file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-dump
# Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-dump
ignore_missing_dump = false
# Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-dump
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-dump-if-db-exists
ignore_dump_if_db_exists = false
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-dump-if-db-exists
#################
### SNAPSHOTS ###
#################
schedule_snapshot = false
# Enables scheduled snapshots when true, disable when false (the default).
# If the value is given as an integer, then enables the scheduled snapshot with the passed value as the interval
# between each snapshot, in seconds.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#schedule-snapshot-creation
schedule_snapshot = false
# https://docs.meilisearch.com/learn/configuration/instance_options.html#schedule-snapshot-creation
# Sets the directory where Meilisearch will store snapshots.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#snapshot-destination
snapshot_dir = "snapshots/"
# Sets the directory where Meilisearch will store snapshots.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#snapshot-destination
# Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-snapshot
# import_snapshot = "./path/to/my/snapshot"
# Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-snapshot
# Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-snapshot
ignore_missing_snapshot = false
# Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-snapshot
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-snapshot-if-db-exists
ignore_snapshot_if_db_exists = false
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-snapshot-if-db-exists
###########
### SSL ###
###########
# Enables client authentication in the specified path.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-authentication-path
# ssl_auth_path = "./path/to/root"
# Enables client authentication in the specified path.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-authentication-path
# Sets the server's SSL certificates.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-certificates-path
# ssl_cert_path = "./path/to/certfile"
# Sets the server's SSL certificates.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-certificates-path
# Sets the server's SSL key files.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-key-path
# ssl_key_path = "./path/to/private-key"
# Sets the server's SSL key files.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-key-path
# Sets the server's OCSP file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-ocsp-path
# ssl_ocsp_path = "./path/to/ocsp-file"
# Sets the server's OCSP file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-ocsp-path
# Makes SSL authentication mandatory.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-require-auth
ssl_require_auth = false
# Makes SSL authentication mandatory.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-require-auth
# Activates SSL session resumption.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-resumption
ssl_resumption = false
# Activates SSL session resumption.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-resumption
# Activates SSL tickets.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-tickets
ssl_tickets = false
# Activates SSL tickets.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-tickets
#############################
### Experimental features ###
#############################
experimental_enable_metrics = false
# Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
# Enables the Prometheus metrics on the `GET /metrics` endpoint.
experimental_enable_metrics = false
# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
experimental_reduce_indexing_memory_usage = false

View File

@ -103,7 +103,7 @@ not_available_failure_usage() {
printf "$RED%s\n$DEFAULT" 'ERROR: Meilisearch binary is not available for your OS distribution or your architecture yet.'
echo ''
echo 'However, you can easily compile the binary from the source files.'
echo 'Follow the steps at the page ("Source" tab): https://www.meilisearch.com/docs/learn/getting_started/installation'
echo 'Follow the steps at the page ("Source" tab): https://docs.meilisearch.com/learn/getting_started/installation.html'
}
fetch_release_failure_usage() {

View File

@ -11,22 +11,22 @@ readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.70"
flate2 = "1.0.25"
http = "0.2.9"
anyhow = "1.0.65"
flate2 = "1.0.22"
http = "0.2.8"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.17.1"
regex = "1.7.3"
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
once_cell = "1.15.0"
regex = "1.6.0"
roaring = { version = "0.10.0", features = ["serde"] }
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.85", features = ["preserve_order"] }
tar = "0.4.38"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }
tempfile = "3.3.0"
thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.1.2", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"

View File

@ -101,9 +101,6 @@ pub enum KindDump {
documents_ids: Vec<String>,
},
DocumentClear,
DocumentDeletionByFilter {
filter: serde_json::Value,
},
Settings {
settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
is_deletion: bool,
@ -169,9 +166,6 @@ impl From<KindWithContent> for KindDump {
KindWithContent::DocumentDeletion { documents_ids, .. } => {
KindDump::DocumentDeletion { documents_ids }
}
KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
KindDump::DocumentDeletionByFilter { filter: filter_expr }
}
KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
KindWithContent::SettingsUpdate {
new_settings,
@ -208,13 +202,12 @@ pub(crate) mod test {
use std::str::FromStr;
use big_s::S;
use maplit::{btreemap, btreeset};
use meilisearch_types::facet_values_sort::FacetValuesSort;
use maplit::btreeset;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
use meilisearch_types::milli::{self};
use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::tasks::{Details, Status};
use serde_json::{json, Map, Value};
use time::macros::datetime;
@ -261,18 +254,10 @@ pub(crate) mod test {
sortable_attributes: Setting::Set(btreeset! { S("age") }),
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
non_separator_tokens: Setting::NotSet,
separator_tokens: Setting::NotSet,
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::Set(FacetingSettings {
max_values_per_facet: Setting::Set(111),
sort_facet_values_by: Setting::Set(
btreemap! { S("age") => FacetValuesSort::Count },
),
}),
faceting: Setting::NotSet,
pagination: Setting::NotSet,
_kind: std::marker::PhantomData,
};
@ -421,8 +406,6 @@ pub(crate) mod test {
}
keys.flush().unwrap();
// ========== TODO: create features here
// create the dump
let mut file = tempfile::tempfile().unwrap();
dump.persist_to(&mut file).unwrap();

View File

@ -25,6 +25,7 @@ impl CompatV2ToV3 {
CompatV2ToV3::Compat(compat) => compat.index_uuid(),
};
v2_uuids
.into_iter()
.into_iter()
.map(|index| v3::meta::IndexUuid { uid: index.uid, uuid: index.uuid })
.collect()

View File

@ -191,10 +191,6 @@ impl CompatV5ToV6 {
})
})))
}
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
Ok(None)
}
}
pub enum CompatIndexV5ToV6 {
@ -340,9 +336,6 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
}
},
stop_words: settings.stop_words.into(),
non_separator_tokens: v6::Setting::NotSet,
separator_tokens: v6::Setting::NotSet,
dictionary: v6::Setting::NotSet,
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
typo_tolerance: match settings.typo_tolerance {
@ -365,7 +358,6 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
faceting: match settings.faceting {
v5::Setting::Set(faceting) => v6::Setting::Set(v6::FacetingSettings {
max_values_per_facet: faceting.max_values_per_facet.into(),
sort_facet_values_by: v6::Setting::NotSet,
}),
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,

View File

@ -107,13 +107,6 @@ impl DumpReader {
DumpReader::Compat(compat) => compat.keys(),
}
}
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
match self {
DumpReader::Current(current) => Ok(current.features()),
DumpReader::Compat(compat) => compat.features(),
}
}
}
impl From<V6Reader> for DumpReader {
@ -196,8 +189,6 @@ pub(crate) mod test {
use super::*;
// TODO: add `features` to tests
#[test]
fn import_dump_v5() {
let dump = File::open("tests/assets/v5.dump").unwrap();

View File

@ -2,7 +2,6 @@ use std::fs::{self, File};
use std::io::{BufRead, BufReader, ErrorKind};
use std::path::Path;
use log::debug;
pub use meilisearch_types::milli;
use tempfile::TempDir;
use time::OffsetDateTime;
@ -19,7 +18,6 @@ pub type Unchecked = meilisearch_types::settings::Unchecked;
pub type Task = crate::TaskDump;
pub type Key = meilisearch_types::keys::Key;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
// ===== Other types to clarify the code of the compat module
// everything related to the tasks
@ -49,7 +47,6 @@ pub struct V6Reader {
metadata: Metadata,
tasks: BufReader<File>,
keys: BufReader<File>,
features: Option<RuntimeTogglableFeatures>,
}
impl V6Reader {
@ -61,29 +58,11 @@ impl V6Reader {
Err(e) => return Err(e.into()),
};
let feature_file = match fs::read(dump.path().join("experimental-features.json")) {
Ok(feature_file) => Some(feature_file),
Err(error) => match error.kind() {
// Allows the file to be missing, this will only result in all experimental features disabled.
ErrorKind::NotFound => {
debug!("`experimental-features.json` not found in dump");
None
}
_ => return Err(error.into()),
},
};
let features = if let Some(feature_file) = feature_file {
Some(serde_json::from_reader(&*feature_file)?)
} else {
None
};
Ok(V6Reader {
metadata: serde_json::from_reader(&*meta_file)?,
instance_uid,
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
features,
dump,
})
}
@ -150,10 +129,6 @@ impl V6Reader {
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
)
}
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
self.features
}
}
pub struct UpdateFile {

View File

@ -4,7 +4,6 @@ use std::path::PathBuf;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::keys::Key;
use meilisearch_types::settings::{Checked, Settings};
use serde_json::{Map, Value};
@ -54,13 +53,6 @@ impl DumpWriter {
TaskWriter::new(self.dir.path().join("tasks"))
}
pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
Ok(std::fs::write(
self.dir.path().join("experimental-features.json"),
serde_json::to_string(&features)?,
)?)
}
pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
let mut tar_encoder = tar::Builder::new(gz_encoder);

View File

@ -11,9 +11,9 @@ edition.workspace = true
license.workspace = true
[dependencies]
tempfile = "3.5.0"
thiserror = "1.0.40"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
tempfile = "3.3.0"
thiserror = "1.0.30"
uuid = { version = "1.1.2", features = ["serde", "v4"] }
[dev-dependencies]
faux = "0.1.9"
faux = "0.1.8"

View File

@ -12,8 +12,8 @@ edition.workspace = true
license.workspace = true
[dependencies]
nom = "7.1.3"
nom_locate = "4.1.0"
nom = "7.1.1"
nom_locate = "4.0.0"
[dev-dependencies]
insta = "1.29.0"
insta = "1.21.0"

View File

@ -20,8 +20,6 @@ pub enum Condition<'a> {
GreaterThanOrEqual(Token<'a>),
Equal(Token<'a>),
NotEqual(Token<'a>),
Null,
Empty,
Exists,
LowerThan(Token<'a>),
LowerThanOrEqual(Token<'a>),
@ -46,38 +44,6 @@ pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
Ok((input, condition))
}
/// null = value "IS" WS+ "NULL"
pub fn parse_is_null(input: Span) -> IResult<FilterCondition> {
let (input, key) = parse_value(input)?;
let (input, _) = tuple((tag("IS"), multispace1, tag("NULL")))(input)?;
Ok((input, FilterCondition::Condition { fid: key, op: Null }))
}
/// null = value "IS" WS+ "NOT" WS+ "NULL"
pub fn parse_is_not_null(input: Span) -> IResult<FilterCondition> {
let (input, key) = parse_value(input)?;
let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("NULL")))(input)?;
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null }))))
}
/// empty = value "IS" WS+ "EMPTY"
pub fn parse_is_empty(input: Span) -> IResult<FilterCondition> {
let (input, key) = parse_value(input)?;
let (input, _) = tuple((tag("IS"), multispace1, tag("EMPTY")))(input)?;
Ok((input, FilterCondition::Condition { fid: key, op: Empty }))
}
/// empty = value "IS" WS+ "NOT" WS+ "EMPTY"
pub fn parse_is_not_empty(input: Span) -> IResult<FilterCondition> {
let (input, key) = parse_value(input)?;
let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("EMPTY")))(input)?;
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Empty }))))
}
/// exist = value "EXISTS"
pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;

View File

@ -143,9 +143,11 @@ impl<'a> Display for Error<'a> {
ErrorKind::MissingClosingDelimiter(c) => {
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
}
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")?
}
ErrorKind::InvalidPrimary => {
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)?
}
ErrorKind::ExpectedEof => {
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
@ -157,7 +159,7 @@ impl<'a> Display for Error<'a> {
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
}
ErrorKind::ReservedGeo(name) => {
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
}
ErrorKind::MisusedGeoRadius => {
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?

View File

@ -47,10 +47,7 @@ mod value;
use std::fmt::Debug;
pub use condition::{parse_condition, parse_to, Condition};
use condition::{
parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null,
parse_not_exists,
};
use condition::{parse_exists, parse_not_exists};
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
pub use error::{Error, ErrorKind};
use nom::branch::alt;
@ -144,7 +141,7 @@ pub enum FilterCondition<'a> {
Or(Vec<Self>),
And(Vec<Self>),
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
GeoBoundingBox { top_left_point: [Token<'a>; 2], bottom_right_point: [Token<'a>; 2] },
}
impl<'a> FilterCondition<'a> {
@ -365,8 +362,8 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
}
let res = FilterCondition::GeoBoundingBox {
top_right_point: [args[0][0].into(), args[0][1].into()],
bottom_left_point: [args[1][0].into(), args[1][1].into()],
top_left_point: [args[0][0].into(), args[0][1].into()],
bottom_right_point: [args[1][0].into(), args[1][1].into()],
};
Ok((input, res))
}
@ -385,34 +382,6 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
}
/// geoPoint = WS* "_geoDistance(float WS* "," WS* float WS* "," WS* float)
fn parse_geo_distance(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoDistance but not after
tuple((
multispace0,
tag("_geoDistance"),
// if we were able to parse `_geoDistance` we are going to return a Failure whatever happens next.
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
))(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))?;
// if we succeeded we still return a `Failure` because `geoDistance` filters are not allowed
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))
}
/// geo = WS* "_geo(float WS* "," WS* float WS* "," WS* float)
fn parse_geo(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geo but not after
tuple((
multispace0,
word_exact("_geo"),
// if we were able to parse `_geo` we are going to return a Failure whatever happens next.
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
))(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))?;
// if we succeeded we still return a `Failure` because `_geo` filter is not allowed
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))
}
fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> {
match parse_condition(input) {
Ok(result) => Ok(result),
@ -445,16 +414,10 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
parse_in,
parse_not_in,
parse_condition,
parse_is_null,
parse_is_not_null,
parse_is_empty,
parse_is_not_empty,
parse_exists,
parse_not_exists,
parse_to,
// the next lines are only for error handling and are written at the end to have the less possible performance impact
parse_geo,
parse_geo_distance,
parse_geo_point,
parse_error_reserved_keyword,
))(input)
@ -533,30 +496,14 @@ pub mod tests {
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
// Test NOT
insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
// Test NULL + NOT NULL
insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
// Test EMPTY + NOT EMPTY
insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
// Test EXISTS + NOT EXITS
// Test NOT + EXISTS
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
// Test nested NOT
insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
@ -629,7 +576,7 @@ pub mod tests {
"###);
insta::assert_display_snapshot!(p("'OR'"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
1:5 'OR'
"###);
@ -639,12 +586,12 @@ pub mod tests {
"###);
insta::assert_display_snapshot!(p("channel Ponce"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
1:14 channel Ponce
"###);
insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
19:19 channel = Ponce OR
"###);
@ -674,35 +621,15 @@ pub mod tests {
"###);
insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.
1:22 _geoPoint(12, 13, 14)
"###);
insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.
13:34 position <= _geoPoint(12, 13, 14)
"###);
insta::assert_display_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
1:25 _geoDistance(12, 13, 14)
"###);
insta::assert_display_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
13:37 position <= _geoDistance(12, 13, 14)
"###);
insta::assert_display_snapshot!(p("_geo(12, 13, 14)"), @r###"
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
1:17 _geo(12, 13, 14)
"###);
insta::assert_display_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
13:29 position <= _geo(12, 13, 14)
"###);
insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
The `_geoRadius` filter is an operation and can't be used as a value.
13:35 position <= _geoRadius(12, 13, 14)
@ -729,12 +656,12 @@ pub mod tests {
"###);
insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
1:17 colour NOT EXIST
"###);
insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
1:23 subscribers 100 TO1000
"###);
@ -795,39 +722,6 @@ pub mod tests {
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
"###);
insta::assert_display_snapshot!(p(r#"value NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
1:11 value NULL
"###);
insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
1:15 value NOT NULL
"###);
insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
1:12 value EMPTY
"###);
insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
1:16 value NOT EMPTY
"###);
insta::assert_display_snapshot!(p(r#"value IS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
1:9 value IS
"###);
insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
1:13 value IS NOT
"###);
insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
1:16 value IS EXISTS
"###);
insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
1:20 value IS NOT EXISTS
"###);
}
#[test]
@ -886,10 +780,7 @@ impl<'a> std::fmt::Display for FilterCondition<'a> {
FilterCondition::GeoLowerThan { point, radius } => {
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
}
FilterCondition::GeoBoundingBox {
top_right_point: top_left_point,
bottom_left_point: bottom_right_point,
} => {
FilterCondition::GeoBoundingBox { top_left_point, bottom_right_point } => {
write!(
f,
"_geoBoundingBox([{}, {}], [{}, {}])",
@ -909,8 +800,6 @@ impl<'a> std::fmt::Display for Condition<'a> {
Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"),
Condition::Equal(token) => write!(f, "= {token}"),
Condition::NotEqual(token) => write!(f, "!= {token}"),
Condition::Null => write!(f, "IS NULL"),
Condition::Empty => write!(f, "IS EMPTY"),
Condition::Exists => write!(f, "EXISTS"),
Condition::LowerThan(token) => write!(f, "< {token}"),
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),

View File

@ -7,8 +7,8 @@ use nom::{InputIter, InputLength, InputTake, Slice};
use crate::error::{ExpectedValueKind, NomErrorExt};
use crate::{
parse_geo, parse_geo_bounding_box, parse_geo_distance, parse_geo_point, parse_geo_radius,
Error, ErrorKind, IResult, Span, Token,
parse_geo_bounding_box, parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span,
Token,
};
/// This function goes through all characters in the [Span] if it finds any escaped character (`\`).
@ -88,16 +88,11 @@ pub fn parse_value(input: Span) -> IResult<Token> {
// then, we want to check if the user is misusing a geo expression
// This expression cant finish without error.
// We want to return an error in case of failure.
let geo_reserved_parse_functions = [parse_geo_point, parse_geo_distance, parse_geo];
for parser in geo_reserved_parse_functions {
if let Err(err) = parser(input) {
if err.is_failure() {
return Err(err);
}
if let Err(err) = parse_geo_point(input) {
if err.is_failure() {
return Err(err);
}
}
match parse_geo_radius(input) {
Ok(_) => {
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
@ -183,20 +178,7 @@ fn is_syntax_component(c: char) -> bool {
}
fn is_keyword(s: &str) -> bool {
matches!(
s,
"AND"
| "OR"
| "IN"
| "NOT"
| "TO"
| "EXISTS"
| "IS"
| "NULL"
| "EMPTY"
| "_geoRadius"
| "_geoBoundingBox"
)
matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius" | "_geoBoundingBox")
}
#[cfg(test)]

View File

@ -4,56 +4,51 @@ use serde_json::{Map, Value};
pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> {
let mut obj = Map::new();
let mut all_entries = vec![];
insert_object(&mut obj, None, json, &mut all_entries);
for (key, old_val) in all_entries {
obj.entry(key).or_insert(old_val.clone());
let mut all_keys = vec![];
insert_object(&mut obj, None, json, &mut all_keys);
for key in all_keys {
obj.entry(key).or_insert(Value::Array(vec![]));
}
obj
}
fn insert_object<'a>(
fn insert_object(
base_json: &mut Map<String, Value>,
base_key: Option<&str>,
object: &'a Map<String, Value>,
all_entries: &mut Vec<(String, &'a Value)>,
object: &Map<String, Value>,
all_keys: &mut Vec<String>,
) {
for (key, value) in object {
let new_key = base_key.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
all_entries.push((new_key.clone(), value));
all_keys.push(new_key.clone());
if let Some(array) = value.as_array() {
insert_array(base_json, &new_key, array, all_entries);
insert_array(base_json, &new_key, array, all_keys);
} else if let Some(object) = value.as_object() {
insert_object(base_json, Some(&new_key), object, all_entries);
insert_object(base_json, Some(&new_key), object, all_keys);
} else {
insert_value(base_json, &new_key, value.clone(), false);
insert_value(base_json, &new_key, value.clone());
}
}
}
fn insert_array<'a>(
fn insert_array(
base_json: &mut Map<String, Value>,
base_key: &str,
array: &'a Vec<Value>,
all_entries: &mut Vec<(String, &'a Value)>,
array: &Vec<Value>,
all_keys: &mut Vec<String>,
) {
for value in array {
if let Some(object) = value.as_object() {
insert_object(base_json, Some(base_key), object, all_entries);
insert_object(base_json, Some(base_key), object, all_keys);
} else if let Some(sub_array) = value.as_array() {
insert_array(base_json, base_key, sub_array, all_entries);
insert_array(base_json, base_key, sub_array, all_keys);
} else {
insert_value(base_json, base_key, value.clone(), true);
insert_value(base_json, base_key, value.clone());
}
}
}
fn insert_value(
base_json: &mut Map<String, Value>,
key: &str,
to_insert: Value,
came_from_array: bool,
) {
fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value) {
debug_assert!(!to_insert.is_object());
debug_assert!(!to_insert.is_array());
@ -68,8 +63,6 @@ fn insert_value(
base_json[key] = Value::Array(vec![value, to_insert]);
}
// if it does not exist we can push the value untouched
} else if came_from_array {
base_json.insert(key.to_string(), Value::Array(vec![to_insert]));
} else {
base_json.insert(key.to_string(), to_insert);
}
@ -120,11 +113,7 @@ mod tests {
assert_eq!(
&flat,
json!({
"a": {
"b": "c",
"d": "e",
"f": "g"
},
"a": [],
"a.b": "c",
"a.d": "e",
"a.f": "g"
@ -175,7 +164,7 @@ mod tests {
assert_eq!(
&flat,
json!({
"a": [42],
"a": 42,
"a.b": ["c", "d", "e"],
})
.as_object()
@ -197,7 +186,7 @@ mod tests {
assert_eq!(
&flat,
json!({
"a": [null],
"a": null,
"a.b": ["c", "d", "e"],
})
.as_object()
@ -219,9 +208,7 @@ mod tests {
assert_eq!(
&flat,
json!({
"a": {
"b": "c"
},
"a": [],
"a.b": ["c", "d"],
})
.as_object()
@ -247,7 +234,7 @@ mod tests {
json!({
"a.b": ["c", "d", "f"],
"a.c": "e",
"a": [35],
"a": 35,
})
.as_object()
.unwrap()
@ -315,53 +302,4 @@ mod tests {
.unwrap()
);
}
#[test]
fn flatten_nested_values_keep_original_values() {
let mut base: Value = json!({
"tags": {
"t1": "v1"
},
"prices": {
"p1": [null],
"p1000": {"tamo": {"le": {}}}
},
"kiki": [[]]
});
let json = std::mem::take(base.as_object_mut().unwrap());
let flat = flatten(&json);
println!("{}", serde_json::to_string_pretty(&flat).unwrap());
assert_eq!(
&flat,
json!({
"prices": {
"p1": [null],
"p1000": {
"tamo": {
"le": {}
}
}
},
"prices.p1": [null],
"prices.p1000": {
"tamo": {
"le": {}
}
},
"prices.p1000.tamo": {
"le": {}
},
"prices.p1000.tamo.le": {},
"tags": {
"t1": "v1"
},
"tags.t1": "v1",
"kiki": [[]]
})
.as_object()
.unwrap()
);
}
}

View File

@ -1,20 +0,0 @@
[package]
name = "fuzzers"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
arbitrary = { version = "1.3.0", features = ["derive"] }
clap = { version = "4.3.0", features = ["derive"] }
fastrand = "1.9.0"
milli = { path = "../milli" }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
tempfile = "3.5.0"

View File

@ -1,3 +0,0 @@
# Fuzzers
The purpose of this crate is to contains all the handmade "fuzzer" we may need.

View File

@ -1,152 +0,0 @@
use std::num::NonZeroUsize;
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::time::Duration;
use arbitrary::{Arbitrary, Unstructured};
use clap::Parser;
use fuzzers::Operation;
use milli::heed::EnvOpenOptions;
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
use milli::Index;
use tempfile::TempDir;
#[derive(Debug, Arbitrary)]
struct Batch([Operation; 5]);
#[derive(Debug, Clone, Parser)]
struct Opt {
/// The number of fuzzer to run in parallel.
#[clap(long)]
par: Option<NonZeroUsize>,
// We need to put a lot of newlines in the following documentation or else everything gets collapsed on one line
/// The path in which the databases will be created.
/// Using a ramdisk is recommended.
///
/// Linux:
///
/// sudo mount -t tmpfs -o size=2g tmpfs ramdisk # to create it
///
/// sudo umount ramdisk # to remove it
///
/// MacOS:
///
/// diskutil erasevolume HFS+ 'RAM Disk' `hdiutil attach -nobrowse -nomount ram://4194304 # create it
///
/// hdiutil detach /dev/:the_disk
#[clap(long)]
path: Option<PathBuf>,
}
fn main() {
let opt = Opt::parse();
let progression: &'static AtomicUsize = Box::leak(Box::new(AtomicUsize::new(0)));
let stop: &'static AtomicBool = Box::leak(Box::new(AtomicBool::new(false)));
let par = opt.par.unwrap_or_else(|| std::thread::available_parallelism().unwrap()).get();
let mut handles = Vec::with_capacity(par);
for _ in 0..par {
let opt = opt.clone();
let handle = std::thread::spawn(move || {
let mut options = EnvOpenOptions::new();
options.map_size(1024 * 1024 * 1024 * 1024);
let tempdir = match opt.path {
Some(path) => TempDir::new_in(path).unwrap(),
None => TempDir::new().unwrap(),
};
let index = Index::new(options, tempdir.path()).unwrap();
let indexer_config = IndexerConfig::default();
let index_documents_config = IndexDocumentsConfig::default();
std::thread::scope(|s| {
loop {
if stop.load(Ordering::Relaxed) {
return;
}
let v: Vec<u8> =
std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect();
let mut data = Unstructured::new(&v);
let batches = <[Batch; 5]>::arbitrary(&mut data).unwrap();
// will be used to display the error once a thread crashes
let dbg_input = format!("{:#?}", batches);
let handle = s.spawn(|| {
let mut wtxn = index.write_txn().unwrap();
for batch in batches {
let mut builder = IndexDocuments::new(
&mut wtxn,
&index,
&indexer_config,
index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
for op in batch.0 {
match op {
Operation::AddDoc(doc) => {
let documents =
milli::documents::objects_from_json_value(doc.to_d());
let documents =
milli::documents::documents_batch_reader_from_objects(
documents,
);
let (b, _added) = builder.add_documents(documents).unwrap();
builder = b;
}
Operation::DeleteDoc(id) => {
let (b, _removed) =
builder.remove_documents(vec![id.to_s()]).unwrap();
builder = b;
}
}
}
builder.execute().unwrap();
// after executing a batch we check if the database is corrupted
let res = index.search(&wtxn).execute().unwrap();
index.documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed);
}
wtxn.abort().unwrap();
});
if let err @ Err(_) = handle.join() {
stop.store(true, Ordering::Relaxed);
err.expect(&dbg_input);
}
}
});
});
handles.push(handle);
}
std::thread::spawn(|| {
let mut last_value = 0;
let start = std::time::Instant::now();
loop {
let total = progression.load(Ordering::Relaxed);
let elapsed = start.elapsed().as_secs();
if elapsed > 3600 {
// after 1 hour, stop the fuzzer, success
std::process::exit(0);
}
println!(
"Has been running for {:?} seconds. Tested {} new values for a total of {}.",
elapsed,
total - last_value,
total
);
last_value = total;
std::thread::sleep(Duration::from_secs(1));
}
});
for handle in handles {
handle.join().unwrap();
}
}

View File

@ -1,46 +0,0 @@
use arbitrary::Arbitrary;
use serde_json::{json, Value};
#[derive(Debug, Arbitrary)]
pub enum Document {
One,
Two,
Three,
Four,
Five,
Six,
}
impl Document {
pub fn to_d(&self) -> Value {
match self {
Document::One => json!({ "id": 0, "doggo": "bernese" }),
Document::Two => json!({ "id": 0, "doggo": "golden" }),
Document::Three => json!({ "id": 0, "catto": "jorts" }),
Document::Four => json!({ "id": 1, "doggo": "bernese" }),
Document::Five => json!({ "id": 1, "doggo": "golden" }),
Document::Six => json!({ "id": 1, "catto": "jorts" }),
}
}
}
#[derive(Debug, Arbitrary)]
pub enum DocId {
Zero,
One,
}
impl DocId {
pub fn to_s(&self) -> String {
match self {
DocId::Zero => "0".to_string(),
DocId::One => "1".to_string(),
}
}
}
#[derive(Debug, Arbitrary)]
pub enum Operation {
AddDoc(Document),
DeleteDoc(DocId),
}

File diff suppressed because it is too large Load Diff

View File

@ -11,30 +11,31 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.70"
anyhow = "1.0.64"
bincode = "1.3.3"
csv = "1.2.1"
derive_builder = "0.12.0"
cluster = { path = "../cluster" }
crossbeam = "0.8.2"
csv = "1.1.6"
derive_builder = "0.11.2"
dump = { path = "../dump" }
enum-iterator = "1.4.0"
enum-iterator = "1.1.3"
file-store = { path = "../file-store" }
log = "0.4.17"
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
puffin = "0.16.0"
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
roaring = { version = "0.10.0", features = ["serde"] }
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.85", features = ["preserve_order"] }
synchronoise = "1.0.1"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }
tempfile = "3.3.0"
thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.1.2", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"
crossbeam = "0.8.2"
insta = { version = "1.29.0", features = ["json", "redactions"] }
insta = { version = "1.19.1", features = ["json", "redactions"] }
meili-snap = { path = "../meili-snap" }
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}

View File

@ -25,7 +25,6 @@ enum AutobatchKind {
primary_key: Option<String>,
},
DocumentDeletion,
DocumentDeletionByFilter,
DocumentClear,
Settings {
allow_index_creation: bool,
@ -65,9 +64,6 @@ impl From<KindWithContent> for AutobatchKind {
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
KindWithContent::DocumentDeletionByFilter { .. } => {
AutobatchKind::DocumentDeletionByFilter
}
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
AutobatchKind::Settings {
allow_index_creation: allow_index_creation && !is_deletion,
@ -101,9 +97,6 @@ pub enum BatchKind {
DocumentDeletion {
deletion_ids: Vec<TaskId>,
},
DocumentDeletionByFilter {
id: TaskId,
},
ClearAndSettings {
other: Vec<TaskId>,
allow_index_creation: bool,
@ -160,7 +153,7 @@ impl BatchKind {
impl BatchKind {
/// Returns a `ControlFlow::Break` if you must stop right now.
/// The boolean tell you if an index has been created by the batched task.
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
/// To ease the writting of the code. `true` can be returned when you don't need to create an index
/// but false can't be returned if you needs to create an index.
// TODO use an AutoBatchKind as input
pub fn new(
@ -202,9 +195,6 @@ impl BatchKind {
K::DocumentDeletion => {
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
}
K::DocumentDeletionByFilter => {
(Break(BatchKind::DocumentDeletionByFilter { id: task_id }), false)
}
K::Settings { allow_index_creation } => (
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
allow_index_creation,
@ -214,7 +204,7 @@ impl BatchKind {
/// Returns a `ControlFlow::Break` if you must stop right now.
/// The boolean tell you if an index has been created by the batched task.
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
/// To ease the writting of the code. `true` can be returned when you don't need to create an index
/// but false can't be returned if you needs to create an index.
#[rustfmt::skip]
fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool, primary_key: Option<&str>) -> ControlFlow<BatchKind, BatchKind> {
@ -222,7 +212,7 @@ impl BatchKind {
match (self, kind) {
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentDeletionByFilter) => Break(this),
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap) => Break(this),
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
Break(this)
@ -518,8 +508,7 @@ impl BatchKind {
BatchKind::IndexCreation { .. }
| BatchKind::IndexDeletion { .. }
| BatchKind::IndexUpdate { .. }
| BatchKind::IndexSwap { .. }
| BatchKind::DocumentDeletionByFilter { .. },
| BatchKind::IndexSwap { .. },
_,
) => {
unreachable!()

View File

@ -22,17 +22,16 @@ use std::ffi::OsStr;
use std::fs::{self, File};
use std::io::BufWriter;
use dump::IndexMetadata;
use crossbeam::utils::Backoff;
use dump::{DumpWriter, IndexMetadata};
use log::{debug, error, info};
use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Settings as MilliSettings,
DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings,
};
use meilisearch_types::milli::{self, Filter, BEU32};
use meilisearch_types::milli::{self, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@ -43,14 +42,14 @@ use uuid::Uuid;
use crate::autobatcher::{self, BatchKind};
use crate::utils::{self, swap_index_uid_in_task};
use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
use crate::{Cluster, Error, IndexScheduler, ProcessingTasks, Result, TaskId};
/// Represents a combination of tasks that can all be processed at the same time.
///
/// A batch contains the set of tasks that it represents (accessible through
/// [`self.ids()`](Batch::ids)), as well as additional information on how to
/// be processed.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub(crate) enum Batch {
TaskCancelation {
/// The task cancelation itself.
@ -67,10 +66,6 @@ pub(crate) enum Batch {
op: IndexOperation,
must_create_index: bool,
},
IndexDocumentDeletionByFilter {
index_uid: String,
task: Task,
},
IndexCreation {
index_uid: String,
primary_key: Option<String>,
@ -91,14 +86,14 @@ pub(crate) enum Batch {
},
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub(crate) enum DocumentOperation {
Add(Uuid),
Delete(Vec<String>),
}
/// A [batch](Batch) that combines multiple tasks operating on an index.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub(crate) enum IndexOperation {
DocumentOperation {
index_uid: String,
@ -155,7 +150,6 @@ impl Batch {
| Batch::TaskDeletion(task)
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::IndexDocumentDeletionByFilter { task, .. }
| Batch::IndexUpdate { task, .. } => vec![task.uid],
Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
@ -194,8 +188,7 @@ impl Batch {
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. }
| IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
| IndexDeletion { index_uid, .. } => Some(index_uid),
}
}
}
@ -235,18 +228,6 @@ impl IndexScheduler {
},
must_create_index,
})),
BatchKind::DocumentDeletionByFilter { id } => {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
match &task.kind {
KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
Ok(Some(Batch::IndexDocumentDeletionByFilter {
index_uid: index_uid.clone(),
task,
}))
}
_ => unreachable!(),
}
}
BatchKind::DocumentOperation { method, operation_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
let primary_key = tasks
@ -471,8 +452,6 @@ impl IndexScheduler {
#[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
puffin::profile_function!();
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
@ -577,9 +556,6 @@ impl IndexScheduler {
self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?;
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
}
puffin::profile_function!(format!("{:?}", batch));
match batch {
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
// 1. Retrieve the tasks that matched the query at enqueue-time.
@ -611,6 +587,12 @@ impl IndexScheduler {
_ => unreachable!(),
}
match &self.cluster {
Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
None => (),
}
// We must only remove the content files if the transaction is successfully committed
// and if errors occurs when we are deleting files we must do our best to delete
// everything. We do not return the encountered errors when deleting the content
@ -654,6 +636,13 @@ impl IndexScheduler {
}
_ => unreachable!(),
}
match &self.cluster {
Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
None => (),
}
wtxn.commit()?;
Ok(vec![task])
}
@ -700,6 +689,9 @@ impl IndexScheduler {
}
// 3. Snapshot every indexes
// TODO we are opening all of the indexes it can be too much we should unload all
// of the indexes we are trying to open. It would be even better to only unload
// the ones that were opened by us. Or maybe use a LRU in the index mapper.
for result in self.index_mapper.index_mapping.iter(&rtxn)? {
let (name, uuid) = result?;
let index = self.index_mapper.index(&rtxn, name)?;
@ -736,14 +728,6 @@ impl IndexScheduler {
// 5.3 Change the permission to make the snapshot readonly
let mut permissions = file.metadata()?.permissions();
permissions.set_readonly(true);
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
#[allow(clippy::non_octal_unix_permissions)]
// rwxrwxrwx
permissions.set_mode(0b100100100);
}
file.set_permissions(permissions)?;
for task in &mut tasks {
@ -753,100 +737,9 @@ impl IndexScheduler {
Ok(tasks)
}
Batch::Dump(mut task) => {
// TODO: It would be better to use the started_at from the task instead of generating a new one
let started_at = OffsetDateTime::now_utc();
let (keys, instance_uid) =
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
(keys, instance_uid)
} else {
unreachable!();
};
let dump = dump::DumpWriter::new(*instance_uid)?;
// 1. dump the keys
let mut dump_keys = dump.create_keys()?;
for key in keys {
dump_keys.push_key(key)?;
}
dump_keys.flush()?;
let rtxn = self.env.read_txn()?;
// 2. dump the tasks
let mut dump_tasks = dump.create_tasks_queue()?;
for ret in self.all_tasks.iter(&rtxn)? {
let (_, mut t) = ret?;
let status = t.status;
let content_file = t.content_uuid();
// In the case we're dumping ourselves we want to be marked as finished
// to not loop over ourselves indefinitely.
if t.uid == task.uid {
let finished_at = OffsetDateTime::now_utc();
// We're going to fake the date because we don't know if everything is going to go well.
// But we need to dump the task as finished and successful.
// If something fail everything will be set appropriately in the end.
t.status = Status::Succeeded;
t.started_at = Some(started_at);
t.finished_at = Some(finished_at);
}
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
if let Some(content_file) = content_file {
if status == Status::Enqueued {
let content_file = self.file_store.get_update(content_file)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (mut cursor, documents_batch_index) =
reader.into_cursor_and_fields_index();
while let Some(doc) =
cursor.next_document().map_err(milli::Error::from)?
{
dump_content_file.push_document(&obkv_to_object(
&doc,
&documents_batch_index,
)?)?;
}
dump_content_file.flush()?;
}
}
}
dump_tasks.flush()?;
// 3. Dump the indexes
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
let rtxn = index.read_txn()?;
let metadata = IndexMetadata {
uid: uid.to_owned(),
primary_key: index.primary_key(&rtxn)?.map(String::from),
created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at(&rtxn)?,
};
let mut index_dumper = dump.create_index(uid, &metadata)?;
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
// 3.1. Dump the documents
for ret in index.all_documents(&rtxn)? {
let (_id, doc) = ret?;
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?;
}
// 3.2. Dump the settings
let settings = meilisearch_types::settings::settings(index, &rtxn)?;
index_dumper.settings(&settings)?;
Ok(())
})?;
// 4. Dump experimental feature settings
let features = self.features()?.runtime_features();
dump.create_experimental_features(features)?;
let dump = self.create_dump(&task, &started_at)?;
let dump_uid = started_at.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
@ -862,85 +755,29 @@ impl IndexScheduler {
Ok(vec![task])
}
Batch::IndexOperation { op, must_create_index } => {
let index_uid = op.index_uid().to_string();
let index_uid = op.index_uid();
let index = if must_create_index {
// create the index if it doesn't already exist
let wtxn = self.env.write_txn()?;
self.index_mapper.create_index(wtxn, &index_uid, None)?
self.index_mapper.create_index(wtxn, index_uid, None)?
} else {
let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, &index_uid)?
self.index_mapper.index(&rtxn, index_uid)?
};
let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
match &self.cluster {
Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
None => (),
}
index_wtxn.commit()?;
// if the update processed successfully, we're going to store the new
// stats of the index. Since the tasks have already been processed and
// this is a non-critical operation. If it fails, we should not fail
// the entire batch.
let res = || -> Result<()> {
let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?;
let mut wtxn = self.env.write_txn()?;
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
wtxn.commit()?;
Ok(())
}();
match res {
Ok(_) => (),
Err(e) => error!("Could not write the stats of the index {}", e),
}
Ok(tasks)
}
Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let (index_uid, filter) =
if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } =
&task.kind
{
(index_uid, filter_expr)
} else {
unreachable!()
};
let index = {
let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, index_uid)?
};
let deleted_documents = delete_document_by_filter(filter, index);
let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: _,
}) = task.details
{
original_filter
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!();
};
match deleted_documents {
Ok(deleted_documents) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(deleted_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(0),
});
task.error = Some(e.into());
}
}
Ok(vec![task])
}
Batch::IndexCreation { index_uid, primary_key, task } => {
let wtxn = self.env.write_txn()?;
if self.index_mapper.exists(&wtxn, &index_uid)? {
@ -969,31 +806,9 @@ impl IndexScheduler {
)?;
index_wtxn.commit()?;
}
// drop rtxn before starting a new wtxn on the same db
rtxn.commit()?;
task.status = Status::Succeeded;
task.details = Some(Details::IndexInfo { primary_key });
// if the update processed successfully, we're going to store the new
// stats of the index. Since the tasks have already been processed and
// this is a non-critical operation. If it fails, we should not fail
// the entire batch.
let res = || -> Result<()> {
let mut wtxn = self.env.write_txn()?;
let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?;
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
wtxn.commit()?;
Ok(())
}();
match res {
Ok(_) => (),
Err(e) => error!("Could not write the stats of the index {}", e),
}
Ok(vec![task])
}
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
@ -1007,7 +822,7 @@ impl IndexScheduler {
}()
.unwrap_or_default();
// The write transaction is directly owned and committed inside.
// The write transaction is directly owned and commited inside.
match self.index_mapper.delete_index(wtxn, &index_uid) {
Ok(()) => (),
Err(Error::IndexNotFound(_)) if index_has_been_created => (),
@ -1057,6 +872,13 @@ impl IndexScheduler {
for swap in swaps {
self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?;
}
match &self.cluster {
Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
None => (),
}
wtxn.commit()?;
task.status = Status::Succeeded;
Ok(vec![task])
@ -1064,6 +886,99 @@ impl IndexScheduler {
}
}
pub(crate) fn create_dump(
&self,
task: &Task,
started_at: &OffsetDateTime,
) -> Result<DumpWriter> {
let (keys, instance_uid) =
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
(keys, instance_uid)
} else {
unreachable!();
};
let dump = dump::DumpWriter::new(*instance_uid)?;
// 1. dump the keys
let mut dump_keys = dump.create_keys()?;
for key in keys {
dump_keys.push_key(key)?;
}
dump_keys.flush()?;
let rtxn = self.env.read_txn()?;
// 2. dump the tasks
let mut dump_tasks = dump.create_tasks_queue()?;
for ret in self.all_tasks.iter(&rtxn)? {
let (_, mut t) = ret?;
let status = t.status;
let content_file = t.content_uuid();
// In the case we're dumping ourselves we want to be marked as finished
// to not loop over ourselves indefinitely.
if t.uid == task.uid {
let finished_at = OffsetDateTime::now_utc();
// We're going to fake the date because we don't know if everything is going to go well.
// But we need to dump the task as finished and successful.
// If something fail everything will be set appropriately in the end.
t.status = Status::Succeeded;
t.started_at = Some(*started_at);
t.finished_at = Some(finished_at);
}
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
if let Some(content_file) = content_file {
if status == Status::Enqueued {
let content_file = self.file_store.get_update(content_file)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
while let Some(doc) = cursor.next_document().map_err(milli::Error::from)? {
dump_content_file
.push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
}
dump_content_file.flush()?;
}
}
}
dump_tasks.flush()?;
// 3. Dump the indexes
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
let rtxn = index.read_txn()?;
let metadata = IndexMetadata {
uid: uid.to_owned(),
primary_key: index.primary_key(&rtxn)?.map(String::from),
created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at(&rtxn)?,
};
let mut index_dumper = dump.create_index(uid, &metadata)?;
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
// 3.1. Dump the documents
for ret in index.all_documents(&rtxn)? {
let (_id, doc) = ret?;
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?;
}
// 3.2. Dump the settings
let settings = meilisearch_types::settings::settings(index, &rtxn)?;
index_dumper.settings(&settings)?;
Ok(())
})?;
Ok(dump)
}
/// Swap the index `lhs` with the index `rhs`.
fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> {
// 1. Verify that both lhs and rhs are existing indexes
@ -1116,8 +1031,6 @@ impl IndexScheduler {
index: &'i Index,
operation: IndexOperation,
) -> Result<Vec<Task>> {
puffin::profile_function!();
match operation {
IndexOperation::DocumentClear { mut tasks, .. } => {
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
@ -1496,26 +1409,274 @@ impl IndexScheduler {
Ok(content_files_to_delete)
}
}
fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result<u64> {
let filter = Filter::from_json(filter)?;
Ok(if let Some(filter) = filter {
let mut wtxn = index.write_txn()?;
pub(crate) fn get_batch_from_cluster_batch(
&self,
batch: cluster::batch::Batch,
) -> Result<Batch> {
use cluster::batch::Batch as CBatch;
let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
let mut rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
for id in batch.ids() {
let backoff = Backoff::new();
let id = BEU32::new(id);
loop {
if self.all_tasks.get(&rtxn, &id)?.is_some() {
info!("Found the task_id");
break;
}
info!("The task is not present in the task queue, we wait");
// we need to drop the txn to make a write visible
drop(rtxn);
backoff.spin();
rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
}
e => e.into(),
})?;
let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?;
delete_operation.delete_documents(&candidates);
let deleted_documents =
delete_operation.execute().map(|result| result.deleted_documents)?;
wtxn.commit()?;
deleted_documents
} else {
0
})
}
Ok(match batch {
CBatch::TaskCancelation { task, previous_started_at, previous_processing_tasks } => {
Batch::TaskCancelation {
task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone(),
previous_started_at,
previous_processing_tasks,
}
}
CBatch::TaskDeletion(task) => {
Batch::TaskDeletion(self.get_existing_tasks(&rtxn, Some(task))?[0].clone())
}
CBatch::SnapshotCreation(tasks) => {
Batch::SnapshotCreation(self.get_existing_tasks(&rtxn, tasks)?)
}
CBatch::Dump(task) => {
Batch::Dump(self.get_existing_tasks(&rtxn, Some(task))?[0].clone())
}
CBatch::IndexOperation { op, must_create_index } => Batch::IndexOperation {
op: self.get_index_op_from_cluster_index_op(&rtxn, op)?,
must_create_index,
},
CBatch::IndexCreation { index_uid, primary_key, task } => Batch::IndexCreation {
index_uid,
primary_key,
task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone(),
},
CBatch::IndexUpdate { index_uid, primary_key, task } => Batch::IndexUpdate {
index_uid,
primary_key,
task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone(),
},
CBatch::IndexDeletion { index_uid, tasks, index_has_been_created } => {
Batch::IndexDeletion {
index_uid,
tasks: self.get_existing_tasks(&rtxn, tasks)?,
index_has_been_created,
}
}
CBatch::IndexSwap { task } => {
Batch::IndexSwap { task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone() }
}
})
}
pub(crate) fn get_index_op_from_cluster_index_op(
&self,
rtxn: &RoTxn,
op: cluster::batch::IndexOperation,
) -> Result<IndexOperation> {
use cluster::batch::IndexOperation as COp;
Ok(match op {
COp::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations,
tasks,
} => IndexOperation::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations: operations.into_iter().map(|op| op.into()).collect(),
tasks: self.get_existing_tasks(rtxn, tasks)?,
},
COp::DocumentDeletion { index_uid, documents, tasks } => {
IndexOperation::DocumentDeletion {
index_uid,
documents,
tasks: self.get_existing_tasks(rtxn, tasks)?,
}
}
COp::DocumentClear { index_uid, tasks } => IndexOperation::DocumentClear {
index_uid,
tasks: self.get_existing_tasks(rtxn, tasks)?,
},
COp::Settings { index_uid, settings, tasks } => IndexOperation::Settings {
index_uid,
settings,
tasks: self.get_existing_tasks(rtxn, tasks)?,
},
COp::DocumentClearAndSetting { index_uid, cleared_tasks, settings, settings_tasks } => {
IndexOperation::DocumentClearAndSetting {
index_uid,
cleared_tasks: self.get_existing_tasks(rtxn, cleared_tasks)?,
settings,
settings_tasks: self.get_existing_tasks(rtxn, settings_tasks)?,
}
}
COp::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations,
document_import_tasks,
settings,
settings_tasks,
} => IndexOperation::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations: operations.into_iter().map(|op| op.into()).collect(),
document_import_tasks: self.get_existing_tasks(rtxn, document_import_tasks)?,
settings,
settings_tasks: self.get_existing_tasks(rtxn, settings_tasks)?,
},
})
}
}
impl From<Batch> for cluster::batch::Batch {
fn from(batch: Batch) -> Self {
use cluster::batch::Batch as CBatch;
match batch {
Batch::TaskCancelation { task, previous_started_at, previous_processing_tasks } => {
CBatch::TaskCancelation {
task: task.uid,
previous_started_at,
previous_processing_tasks,
}
}
Batch::TaskDeletion(task) => CBatch::TaskDeletion(task.uid),
Batch::SnapshotCreation(task) => {
CBatch::SnapshotCreation(task.into_iter().map(|task| task.uid).collect())
}
Batch::Dump(task) => CBatch::Dump(task.uid),
Batch::IndexOperation { op, must_create_index } => {
CBatch::IndexOperation { op: op.into(), must_create_index }
}
Batch::IndexCreation { index_uid, primary_key, task } => {
CBatch::IndexCreation { index_uid, primary_key, task: task.uid }
}
Batch::IndexUpdate { index_uid, primary_key, task } => {
CBatch::IndexUpdate { index_uid, primary_key, task: task.uid }
}
Batch::IndexDeletion { index_uid, tasks, index_has_been_created } => {
CBatch::IndexDeletion {
index_uid,
tasks: tasks.into_iter().map(|task| task.uid).collect(),
index_has_been_created,
}
}
Batch::IndexSwap { task } => CBatch::IndexSwap { task: task.uid },
}
}
}
impl From<IndexOperation> for cluster::batch::IndexOperation {
fn from(op: IndexOperation) -> Self {
use cluster::batch::IndexOperation as COp;
match op {
IndexOperation::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations,
tasks,
} => COp::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations: operations.into_iter().map(|op| op.into()).collect(),
tasks: tasks.into_iter().map(|task| task.uid).collect(),
},
IndexOperation::DocumentDeletion { index_uid, documents, tasks } => {
COp::DocumentDeletion {
index_uid,
documents,
tasks: tasks.into_iter().map(|task| task.uid).collect(),
}
}
IndexOperation::DocumentClear { index_uid, tasks } => COp::DocumentClear {
index_uid,
tasks: tasks.into_iter().map(|task| task.uid).collect(),
},
IndexOperation::Settings { index_uid, settings, tasks } => COp::Settings {
index_uid,
settings,
tasks: tasks.into_iter().map(|task| task.uid).collect(),
},
IndexOperation::DocumentClearAndSetting {
index_uid,
cleared_tasks,
settings,
settings_tasks,
} => COp::DocumentClearAndSetting {
index_uid,
cleared_tasks: cleared_tasks.into_iter().map(|task| task.uid).collect(),
settings,
settings_tasks: settings_tasks.into_iter().map(|task| task.uid).collect(),
},
IndexOperation::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations,
document_import_tasks,
settings,
settings_tasks,
} => COp::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations: operations.into_iter().map(|op| op.into()).collect(),
document_import_tasks: document_import_tasks
.into_iter()
.map(|task| task.uid)
.collect(),
settings,
settings_tasks: settings_tasks.into_iter().map(|task| task.uid).collect(),
},
}
}
}
impl From<DocumentOperation> for cluster::batch::DocumentOperation {
fn from(op: DocumentOperation) -> Self {
use cluster::batch::DocumentOperation as COp;
match op {
DocumentOperation::Add(uuid) => COp::Add(uuid),
DocumentOperation::Delete(docs) => COp::Delete(docs),
}
}
}
impl From<cluster::batch::DocumentOperation> for DocumentOperation {
fn from(op: cluster::batch::DocumentOperation) -> Self {
use cluster::batch::DocumentOperation as COp;
match op {
COp::Add(uuid) => DocumentOperation::Add(uuid),
COp::Delete(docs) => DocumentOperation::Delete(docs),
}
}
}

View File

@ -46,8 +46,6 @@ impl From<DateField> for Code {
#[allow(clippy::large_enum_variant)]
#[derive(Error, Debug)]
pub enum Error {
#[error("{1}")]
WithCustomErrorCode(Code, Box<Self>),
#[error("Index `{0}` not found.")]
IndexNotFound(String),
#[error("Index `{0}` already exists.")]
@ -63,8 +61,6 @@ pub enum Error {
SwapDuplicateIndexesFound(Vec<String>),
#[error("Index `{0}` not found.")]
SwapIndexNotFound(String),
#[error("Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.")]
NoSpaceLeftInTaskQueue,
#[error(
"Indexes {} not found.",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
@ -123,8 +119,6 @@ pub enum Error {
IoError(#[from] std::io::Error),
#[error(transparent)]
Persist(#[from] tempfile::PersistError),
#[error(transparent)]
FeatureNotEnabled(#[from] FeatureNotEnabledError),
#[error(transparent)]
Anyhow(#[from] anyhow::Error),
@ -138,70 +132,11 @@ pub enum Error {
TaskDatabaseUpdate(Box<Self>),
#[error(transparent)]
HeedTransaction(heed::Error),
#[cfg(test)]
#[error("Planned failure for tests.")]
PlannedFailure,
}
#[derive(Debug, thiserror::Error)]
#[error(
"{disabled_action} requires enabling the `{feature}` experimental feature. See {issue_link}"
)]
pub struct FeatureNotEnabledError {
pub disabled_action: &'static str,
pub feature: &'static str,
pub issue_link: &'static str,
}
impl Error {
pub fn is_recoverable(&self) -> bool {
match self {
Error::IndexNotFound(_)
| Error::WithCustomErrorCode(_, _)
| Error::IndexAlreadyExists(_)
| Error::SwapDuplicateIndexFound(_)
| Error::SwapDuplicateIndexesFound(_)
| Error::SwapIndexNotFound(_)
| Error::NoSpaceLeftInTaskQueue
| Error::SwapIndexesNotFound(_)
| Error::CorruptedDump
| Error::InvalidTaskDate { .. }
| Error::InvalidTaskUids { .. }
| Error::InvalidTaskStatuses { .. }
| Error::InvalidTaskTypes { .. }
| Error::InvalidTaskCanceledBy { .. }
| Error::InvalidIndexUid { .. }
| Error::TaskNotFound(_)
| Error::TaskDeletionWithEmptyQuery
| Error::TaskCancelationWithEmptyQuery
| Error::Dump(_)
| Error::Heed(_)
| Error::Milli(_)
| Error::ProcessBatchPanicked
| Error::FileStore(_)
| Error::IoError(_)
| Error::Persist(_)
| Error::FeatureNotEnabled(_)
| Error::Anyhow(_) => true,
Error::CreateBatch(_)
| Error::CorruptedTaskQueue
| Error::TaskDatabaseUpdate(_)
| Error::HeedTransaction(_) => false,
#[cfg(test)]
Error::PlannedFailure => false,
}
}
pub fn with_custom_error_code(self, code: Code) -> Self {
Self::WithCustomErrorCode(code, Box::new(self))
}
}
impl ErrorCode for Error {
fn error_code(&self) -> Code {
match self {
Error::WithCustomErrorCode(code, _) => *code,
Error::IndexNotFound(_) => Code::IndexNotFound,
Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,
@ -217,8 +152,6 @@ impl ErrorCode for Error {
Error::TaskNotFound(_) => Code::TaskNotFound,
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
// TODO: not sure of the Code to use
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
Error::Dump(e) => e.error_code(),
Error::Milli(e) => e.error_code(),
Error::ProcessBatchPanicked => Code::Internal,
@ -227,7 +160,6 @@ impl ErrorCode for Error {
Error::FileStore(e) => e.error_code(),
Error::IoError(e) => e.error_code(),
Error::Persist(e) => e.error_code(),
Error::FeatureNotEnabled(_) => Code::FeatureNotEnabled,
// Irrecoverable errors
Error::Anyhow(_) => Code::Internal,
@ -235,9 +167,6 @@ impl ErrorCode for Error {
Error::CorruptedDump => Code::Internal,
Error::TaskDatabaseUpdate(_) => Code::Internal,
Error::CreateBatch(_) => Code::Internal,
#[cfg(test)]
Error::PlannedFailure => Code::Internal,
}
}
}

View File

@ -1,98 +0,0 @@
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use crate::error::FeatureNotEnabledError;
use crate::Result;
const EXPERIMENTAL_FEATURES: &str = "experimental-features";
#[derive(Clone)]
pub(crate) struct FeatureData {
runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
instance: InstanceTogglableFeatures,
}
#[derive(Debug, Clone, Copy)]
pub struct RoFeatures {
runtime: RuntimeTogglableFeatures,
instance: InstanceTogglableFeatures,
}
impl RoFeatures {
fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
let runtime = data.runtime_features(txn)?;
Ok(Self { runtime, instance: data.instance })
}
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
self.runtime
}
pub fn check_score_details(&self) -> Result<()> {
if self.runtime.score_details {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Computing score details",
feature: "score details",
issue_link: "https://github.com/meilisearch/product/discussions/674",
}
.into())
}
}
pub fn check_metrics(&self) -> Result<()> {
if self.instance.metrics {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Getting metrics",
feature: "metrics",
issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
}
.into())
}
}
pub fn check_vector(&self) -> Result<()> {
if self.runtime.vector_store {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Passing `vector` as a query parameter",
feature: "vector store",
issue_link: "https://github.com/meilisearch/product/discussions/677",
}
.into())
}
}
}
impl FeatureData {
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
let mut wtxn = env.write_txn()?;
let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
wtxn.commit()?;
Ok(Self { runtime: runtime_features, instance: instance_features })
}
pub fn put_runtime_features(
&self,
mut wtxn: RwTxn,
features: RuntimeTogglableFeatures,
) -> Result<()> {
self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
wtxn.commit()?;
Ok(())
}
fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
}
pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
RoFeatures::new(txn, self)
}
}

View File

@ -5,7 +5,6 @@ use std::collections::BTreeMap;
use std::path::Path;
use std::time::Duration;
use meilisearch_types::heed::flags::Flags;
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
use meilisearch_types::milli::Index;
use time::OffsetDateTime;
@ -54,7 +53,6 @@ pub struct IndexMap {
pub struct ClosingIndex {
uuid: Uuid,
closing_event: EnvClosingEvent,
enable_mdb_writemap: bool,
map_size: usize,
generation: usize,
}
@ -70,7 +68,6 @@ impl ClosingIndex {
pub fn wait_timeout(self, timeout: Duration) -> Option<ReopenableIndex> {
self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex {
uuid: self.uuid,
enable_mdb_writemap: self.enable_mdb_writemap,
map_size: self.map_size,
generation: self.generation,
})
@ -79,7 +76,6 @@ impl ClosingIndex {
pub struct ReopenableIndex {
uuid: Uuid,
enable_mdb_writemap: bool,
map_size: usize,
generation: usize,
}
@ -107,7 +103,7 @@ impl ReopenableIndex {
return Ok(());
}
map.unavailable.remove(&self.uuid);
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?;
map.create(&self.uuid, path, None, self.map_size)?;
}
Ok(())
}
@ -174,17 +170,16 @@ impl IndexMap {
uuid: &Uuid,
path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>,
enable_mdb_writemap: bool,
map_size: usize,
) -> Result<Index> {
if !matches!(self.get_unavailable(uuid), Missing) {
panic!("Attempt to open an index that was unavailable");
}
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?;
let index = create_or_open_index(path, date, map_size)?;
match self.available.insert(*uuid, index.clone()) {
InsertionOutcome::InsertedNew => (),
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
self.close(evicted_uuid, evicted_index, enable_mdb_writemap, 0);
self.close(evicted_uuid, evicted_index, 0);
}
InsertionOutcome::Replaced(_) => {
panic!("Attempt to open an index that was already opened")
@ -217,32 +212,17 @@ impl IndexMap {
/// | Closing | Closing |
/// | Available | Closing |
///
pub fn close_for_resize(
&mut self,
uuid: &Uuid,
enable_mdb_writemap: bool,
map_size_growth: usize,
) {
let Some(index) = self.available.remove(uuid) else {
return;
};
self.close(*uuid, index, enable_mdb_writemap, map_size_growth);
pub fn close_for_resize(&mut self, uuid: &Uuid, map_size_growth: usize) {
let Some(index) = self.available.remove(uuid) else { return; };
self.close(*uuid, index, map_size_growth);
}
fn close(
&mut self,
uuid: Uuid,
index: Index,
enable_mdb_writemap: bool,
map_size_growth: usize,
) {
fn close(&mut self, uuid: Uuid, index: Index, map_size_growth: usize) {
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
let closing_event = index.prepare_for_closing();
let generation = self.next_generation();
self.unavailable.insert(
uuid,
Some(ClosingIndex { uuid, closing_event, enable_mdb_writemap, map_size, generation }),
);
self.unavailable
.insert(uuid, Some(ClosingIndex { uuid, closing_event, map_size, generation }));
}
/// Attempts to delete and index.
@ -302,15 +282,11 @@ impl IndexMap {
fn create_or_open_index(
path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>,
enable_mdb_writemap: bool,
map_size: usize,
) -> Result<Index> {
let mut options = EnvOpenOptions::new();
options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024);
if enable_mdb_writemap {
unsafe { options.flag(Flags::MdbWriteMap) };
}
if let Some((created, updated)) = date {
Ok(Index::new_with_creation_dates(options, path, created, updated)?)

View File

@ -4,11 +4,10 @@ use std::time::Duration;
use std::{fs, thread};
use log::error;
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize};
use meilisearch_types::milli::Index;
use time::OffsetDateTime;
use uuid::Uuid;
@ -20,7 +19,6 @@ use crate::{Error, Result};
mod index_map;
const INDEX_MAPPING: &str = "index-mapping";
const INDEX_STATS: &str = "index-stats";
/// Structure managing meilisearch's indexes.
///
@ -54,11 +52,6 @@ pub struct IndexMapper {
/// Map an index name with an index uuid currently available on disk.
pub(crate) index_mapping: Database<Str, UuidCodec>,
/// Map an index UUID with the cached stats associated to the index.
///
/// Using an UUID forces to use the index_mapping table to recover the index behind a name, ensuring
/// consistency wrt index swapping.
pub(crate) index_stats: Database<UuidCodec, SerdeJson<IndexStats>>,
/// Path to the folder where the LMDB environments of each index are.
base_path: PathBuf,
@ -66,8 +59,6 @@ pub struct IndexMapper {
index_base_map_size: usize,
/// The quantity by which the map size of an index is incremented upon reopening, in bytes.
index_growth_amount: usize,
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
enable_mdb_writemap: bool,
pub indexer_config: Arc<IndexerConfig>,
}
@ -85,48 +76,6 @@ pub enum IndexStatus {
Available(Index),
}
/// The statistics that can be computed from an `Index` object.
#[derive(Serialize, Deserialize, Debug)]
pub struct IndexStats {
/// Number of documents in the index.
pub number_of_documents: u64,
/// Size taken up by the index' DB, in bytes.
///
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
/// are not returned to the disk after a deletion, this number is typically larger than
/// `used_database_size` that only includes the size of the used pages.
pub database_size: u64,
/// Size taken by the used pages of the index' DB, in bytes.
///
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
/// this value is typically smaller than `database_size`.
pub used_database_size: u64,
/// Association of every field name with the number of times it occurs in the documents.
pub field_distribution: FieldDistribution,
/// Creation date of the index.
pub created_at: OffsetDateTime,
/// Date of the last update of the index.
pub updated_at: OffsetDateTime,
}
impl IndexStats {
/// Compute the stats of an index
///
/// # Parameters
///
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
Ok(IndexStats {
number_of_documents: index.number_of_documents(rtxn)?,
database_size: index.on_disk_size()?,
used_database_size: index.used_size()?,
field_distribution: index.field_distribution(rtxn)?,
created_at: index.created_at(rtxn)?,
updated_at: index.updated_at(rtxn)?,
})
}
}
impl IndexMapper {
pub fn new(
env: &Env,
@ -134,22 +83,14 @@ impl IndexMapper {
index_base_map_size: usize,
index_growth_amount: usize,
index_count: usize,
enable_mdb_writemap: bool,
indexer_config: IndexerConfig,
) -> Result<Self> {
let mut wtxn = env.write_txn()?;
let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?;
wtxn.commit()?;
Ok(Self {
index_map: Arc::new(RwLock::new(IndexMap::new(index_count))),
index_mapping,
index_stats,
index_mapping: env.create_database(Some(INDEX_MAPPING))?,
base_path,
index_base_map_size,
index_growth_amount,
enable_mdb_writemap,
indexer_config: Arc::new(indexer_config),
})
}
@ -180,7 +121,6 @@ impl IndexMapper {
&uuid,
&index_path,
date,
self.enable_mdb_writemap,
self.index_base_map_size,
)?;
@ -200,9 +140,6 @@ impl IndexMapper {
.get(&wtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// Not an error if the index had no stats in cache.
self.index_stats.delete(&mut wtxn, &uuid)?;
// Once we retrieved the UUID of the index we remove it from the mapping table.
assert!(self.index_mapping.delete(&mut wtxn, name)?);
@ -292,11 +229,7 @@ impl IndexMapper {
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// We remove the index from the in-memory index map.
self.index_map.write().unwrap().close_for_resize(
&uuid,
self.enable_mdb_writemap,
self.index_growth_amount,
);
self.index_map.write().unwrap().close_for_resize(&uuid, self.index_growth_amount);
Ok(())
}
@ -361,7 +294,6 @@ impl IndexMapper {
&uuid,
&index_path,
None,
self.enable_mdb_writemap,
self.index_base_map_size,
)?;
}
@ -428,45 +360,6 @@ impl IndexMapper {
Ok(())
}
/// The stats of an index.
///
/// If available in the cache, they are directly returned.
/// Otherwise, the `Index` is opened to compute the stats on the fly (the result is not cached).
/// The stats for an index are cached after each `Index` update.
pub fn stats_of(&self, rtxn: &RoTxn, index_uid: &str) -> Result<IndexStats> {
let uuid = self
.index_mapping
.get(rtxn, index_uid)?
.ok_or_else(|| Error::IndexNotFound(index_uid.to_string()))?;
match self.index_stats.get(rtxn, &uuid)? {
Some(stats) => Ok(stats),
None => {
let index = self.index(rtxn, index_uid)?;
let index_rtxn = index.read_txn()?;
IndexStats::new(&index, &index_rtxn)
}
}
}
/// Stores the new stats for an index.
///
/// Expected usage is to compute the stats the index using `IndexStats::new`, the pass it to this function.
pub fn store_stats_of(
&self,
wtxn: &mut RwTxn,
index_uid: &str,
stats: &IndexStats,
) -> Result<()> {
let uuid = self
.index_mapping
.get(wtxn, index_uid)?
.ok_or_else(|| Error::IndexNotFound(index_uid.to_string()))?;
self.index_stats.put(wtxn, &uuid, stats)?;
Ok(())
}
pub fn index_exists(&self, rtxn: &RoTxn, name: &str) -> Result<bool> {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}

View File

@ -28,13 +28,13 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
started_at,
finished_at,
index_mapper,
features: _,
max_number_of_tasks: _,
wake_up: _,
dumps_path: _,
snapshots_path: _,
auth_path: _,
version_file_path: _,
cluster: _,
consistency_level: _,
test_breakpoint_sdr: _,
planned_failures: _,
run_loop_iteration: _,
@ -185,9 +185,6 @@ fn snapshot_details(d: &Details) -> String {
provided_ids: received_document_ids,
deleted_documents,
} => format!("{{ received_document_ids: {received_document_ids}, deleted_documents: {deleted_documents:?} }}"),
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => format!(
"{{ original_filter: {original_filter}, deleted_documents: {deleted_documents:?} }}"
),
Details::ClearAll { deleted_documents } => {
format!("{{ deleted_documents: {deleted_documents:?} }}")
},
@ -259,16 +256,6 @@ pub fn snapshot_canceled_by(
snap
}
pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
let mut s = String::new();
let names = mapper.index_names(rtxn).unwrap();
for name in names {
let stats = mapper.stats_of(rtxn, &name).unwrap();
s.push_str(&format!(
"{name}: {{ number_of_documents: {}, field_distribution: {:?} }}\n",
stats.number_of_documents, stats.field_distribution
));
}
s
format!("{names:?}")
}

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
---
source: index-scheduler/src/lib.rs
assertion_line: 1755
---
### Autobatching Enabled = true
### Processing Tasks:
@ -22,7 +23,7 @@ canceled [0,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
1 [0,]

View File

@ -20,7 +20,7 @@ enqueued [0,1,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -25,9 +25,7 @@ catto [0,]
wolfo [2,]
----------------------------------------------------------------------
### Index Mapper:
beavero: { number_of_documents: 0, field_distribution: {} }
catto: { number_of_documents: 1, field_distribution: {"id": 1} }
["beavero", "catto"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -1,5 +1,6 @@
---
source: index-scheduler/src/lib.rs
assertion_line: 1859
---
### Autobatching Enabled = true
### Processing Tasks:
@ -26,9 +27,7 @@ catto [0,]
wolfo [2,]
----------------------------------------------------------------------
### Index Mapper:
beavero: { number_of_documents: 0, field_distribution: {} }
catto: { number_of_documents: 1, field_distribution: {"id": 1} }
["beavero", "catto"]
----------------------------------------------------------------------
### Canceled By:
3 [1,2,]

View File

@ -23,8 +23,7 @@ catto [0,]
wolfo [2,]
----------------------------------------------------------------------
### Index Mapper:
catto: { number_of_documents: 1, field_distribution: {"id": 1} }
["catto"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -25,8 +25,7 @@ catto [0,]
wolfo [2,]
----------------------------------------------------------------------
### Index Mapper:
catto: { number_of_documents: 1, field_distribution: {"id": 1} }
["catto"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -20,8 +20,7 @@ enqueued [0,1,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
catto: { number_of_documents: 0, field_distribution: {} }
["catto"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -1,5 +1,6 @@
---
source: index-scheduler/src/lib.rs
assertion_line: 1818
---
### Autobatching Enabled = true
### Processing Tasks:
@ -22,8 +23,7 @@ canceled [0,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
catto: { number_of_documents: 0, field_distribution: {} }
["catto"]
----------------------------------------------------------------------
### Canceled By:
1 [0,]

View File

@ -20,7 +20,7 @@ enqueued [0,1,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -21,8 +21,7 @@ succeeded [0,1,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
catto: { number_of_documents: 1, field_distribution: {"id": 1} }
["catto"]
----------------------------------------------------------------------
### Canceled By:
1 []

View File

@ -19,8 +19,7 @@ succeeded [0,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
catto: { number_of_documents: 1, field_distribution: {"id": 1} }
["catto"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -27,10 +27,7 @@ doggos [0,3,]
girafos [2,5,]
----------------------------------------------------------------------
### Index Mapper:
cattos: { number_of_documents: 0, field_distribution: {} }
doggos: { number_of_documents: 0, field_distribution: {} }
girafos: { number_of_documents: 0, field_distribution: {} }
["cattos", "doggos", "girafos"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -19,8 +19,7 @@ succeeded [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
["doggos"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -21,8 +21,7 @@ succeeded [0,1,]
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
["doggos"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -20,7 +20,7 @@ enqueued [0,1,]
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -23,8 +23,7 @@ succeeded [0,]
doggos [0,1,2,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 0, field_distribution: {} }
["doggos"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -23,7 +23,7 @@ succeeded [0,1,2,]
doggos [0,1,2,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -20,7 +20,7 @@ enqueued [0,1,]
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -22,7 +22,7 @@ enqueued [0,1,2,]
doggos [0,1,2,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -20,7 +20,7 @@ enqueued [0,1,]
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -21,7 +21,7 @@ succeeded [0,1,]
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -21,7 +21,7 @@ failed [0,]
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -22,8 +22,7 @@ failed [0,]
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} }
["doggos"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -20,7 +20,7 @@ enqueued [0,1,]
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
@ -19,7 +19,7 @@ failed [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
enqueued []
@ -19,7 +19,7 @@ failed [0,]
catto [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,8 +18,7 @@ enqueued [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
["doggos"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,8 +18,7 @@ enqueued [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
["doggos"]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -18,7 +18,7 @@ enqueued [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:

View File

@ -19,8 +19,7 @@ succeeded [0,]
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
["doggos"]
----------------------------------------------------------------------
### Canceled By:

Some files were not shown because too many files have changed in this diff Show More