Compare commits

..

3 Commits

Author SHA1 Message Date
Louis Dureuil
2118dcac7a make CI newer 2023-04-18 19:42:52 +02:00
Louis Dureuil
6939d3d061 More error logs 2023-04-18 19:36:58 +02:00
Louis Dureuil
5d2ca496cb updated tempfile and put some hopeful log 2023-04-18 19:18:15 +02:00
91 changed files with 2473 additions and 4172 deletions

View File

@@ -23,8 +23,8 @@ A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
**Meilisearch version:** [e.g. v0.20.0]
**MeiliSearch version:** [e.g. v0.20.0]
**Additional context**
Additional information that may be relevant to the issue.
[e.g. architecture, device, OS, browser]
[e.g. architecture, device, OS, browser]

View File

@@ -6,5 +6,5 @@ contact_links:
url: https://github.com/meilisearch/documentation/issues/new
about: For documentation issues, open an issue or a PR in the documentation repository
- name: Support questions & other
url: https://github.com/meilisearch/meilisearch/discussions/new
url: https://github.com/meilisearch/MeiliSearch/discussions/new
about: For any other question, open a discussion in this repository

View File

@@ -74,7 +74,7 @@ semverLT() {
# Returns the tag of the latest stable release (in terms of semver and not of release date)
get_latest() {
temp_file='temp_file' # temp_file needed because the grep would start before the download is over
curl -s 'https://api.github.com/repos/meilisearch/meilisearch/releases' > "$temp_file"
curl -s 'https://api.github.com/repos/meilisearch/MeiliSearch/releases' > "$temp_file"
releases=$(cat "$temp_file" | \
grep -E "tag_name|draft|prerelease" \
| tr -d ',"' | cut -d ':' -f2 | tr -d ' ')

View File

@@ -1,4 +1,4 @@
# GitHub Actions Workflow for Meilisearch
# GitHub Actions Workflow for MeiliSearch
> **Note:**

View File

@@ -38,69 +38,28 @@ jobs:
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-aarch64:
name: Publish to GitHub
runs-on: ${{ matrix.os }}
continue-on-error: false
strategy:
fail-fast: false
matrix:
include:
- build: aarch64
os: ubuntu-18.04
target: aarch64-unknown-linux-gnu
linker: gcc-aarch64-linux-gnu
use-cross: true
asset_name: meilisearch-linux-aarch64
publish-armv8:
name: Publish for ARMv8
runs-on: ubuntu-18.04
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Installing Rust toolchain
uses: actions-rs/toolchain@v1
- uses: actions/checkout@v2
- uses: uraimo/run-on-arch-action@v2.1.1
id: runcmd
with:
toolchain: stable
profile: minimal
target: ${{ matrix.target }}
override: true
- name: APT update
run: |
sudo apt update
- name: Install target specific tools
if: matrix.use-cross
run: |
sudo apt-get install -y ${{ matrix.linker }}
- name: Configure target aarch64 GNU
if: matrix.target == 'aarch64-unknown-linux-gnu'
## Environment variable is not passed using env:
## LD gold won't work with MUSL
# env:
# JEMALLOC_SYS_WITH_LG_PAGE: 16
# RUSTFLAGS: '-Clink-arg=-fuse-ld=gold'
run: |
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
echo RUSTFLAGS="-Clink-arg=-fuse-ld=gold" >> $GITHUB_ENV
- name: Cargo build
uses: actions-rs/cargo@v1
with:
command: build
use-cross: ${{ matrix.use-cross }}
args: --release --target ${{ matrix.target }}
- name: List target output files
run: ls -lR ./target
arch: aarch64 # aka ARMv8
distro: ubuntu18.04
env: |
JEMALLOC_SYS_WITH_LG_PAGE: 16
run: |
apt update
apt install -y curl gcc make
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable
source $HOME/.cargo/env
cargo build --release --locked
- name: Upload the binary to release
uses: svenstaro/upload-release-action@v1-release
with:
repo_token: ${{ secrets.PUBLISH_TOKEN }}
file: target/${{ matrix.target }}/release/meilisearch
asset_name: ${{ matrix.asset_name }}
file: target/release/meilisearch
asset_name: meilisearch-linux-armv8
tag: ${{ github.ref }}

View File

@@ -0,0 +1,76 @@
name: Publish aarch64 binary
on:
release:
types: [published]
env:
CARGO_TERM_COLOR: always
jobs:
publish-aarch64:
name: Publish to Github
runs-on: ${{ matrix.os }}
continue-on-error: false
strategy:
fail-fast: false
matrix:
include:
- build: aarch64
os: ubuntu-18.04
target: aarch64-unknown-linux-gnu
linker: gcc-aarch64-linux-gnu
use-cross: true
asset_name: meilisearch-linux-aarch64
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Installing Rust toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
target: ${{ matrix.target }}
override: true
- name: APT update
run: |
sudo apt update
- name: Install target specific tools
if: matrix.use-cross
run: |
sudo apt-get install -y ${{ matrix.linker }}
- name: Configure target aarch64 GNU
if: matrix.target == 'aarch64-unknown-linux-gnu'
## Environment variable is not passed using env:
## LD gold won't work with MUSL
# env:
# JEMALLOC_SYS_WITH_LG_PAGE: 16
# RUSTFLAGS: '-Clink-arg=-fuse-ld=gold'
run: |
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
echo RUSTFLAGS="-Clink-arg=-fuse-ld=gold" >> $GITHUB_ENV
- name: Cargo build
uses: actions-rs/cargo@v1
with:
command: build
use-cross: ${{ matrix.use-cross }}
args: --release --target ${{ matrix.target }}
- name: List target output files
run: ls -lR ./target
- name: Upload the binary to release
uses: svenstaro/upload-release-action@v1-release
with:
repo_token: ${{ secrets.PUBLISH_TOKEN }}
file: target/${{ matrix.target }}/release/meilisearch
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}

View File

@@ -0,0 +1,105 @@
name: Publish images to Docker Hub
on:
push:
# Will run for every tag pushed except `latest`
# When the `latest` git tag is created with this [CI](../latest-git-tag.yml)
# we don't need to create a Docker `latest` image again.
# The `latest` Docker image push is already done in this CI when releasing a stable version of Meilisearch.
tags-ignore:
- latest
# Both `schedule` and `workflow_dispatch` build the nightly tag
schedule:
- cron: '0 23 * * *' # Every day at 11:00pm
workflow_dispatch:
jobs:
docker:
runs-on: docker
steps:
- uses: actions/checkout@v3
# If we are running a cron or manual job ('schedule' or 'workflow_dispatch' event), it means we are publishing the `nightly` tag, so not considered stable.
# If we have pushed a tag, and the tag has the v<nmumber>.<number>.<number> format, it means we are publishing an official release, so considered stable.
# In this situation, we need to set `output.stable` to create/update the following tags (additionally to the `vX.Y.Z` Docker tag):
# - a `vX.Y` (without patch version) Docker tag
# - a `latest` Docker tag
# For any other tag pushed, this is not considered stable.
- name: Define if stable and latest release
id: check-tag-format
env:
# To avoid request limit with the .github/scripts/is-latest-release.sh script
GITHUB_PATH: ${{ secrets.MEILI_BOT_GH_PAT }}
run: |
escaped_tag=$(printf "%q" ${{ github.ref_name }})
echo "latest=false" >> $GITHUB_OUTPUT
if [[ ${{ github.event_name }} != 'push' ]]; then
echo "stable=false" >> $GITHUB_OUTPUT
elif [[ $escaped_tag =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "stable=true" >> $GITHUB_OUTPUT
echo "latest=$(sh .github/scripts/is-latest-release.sh)" >> $GITHUB_OUTPUT
else
echo "stable=false" >> $GITHUB_OUTPUT
fi
# Check only the validity of the tag for stable releases (not for pre-releases or other tags)
- name: Check release validity
if: steps.check-tag-format.outputs.stable == 'true'
run: bash .github/scripts/check-release.sh
- name: Set build-args for Docker buildx
id: build-metadata
run: |
# Extract commit date
commit_date=$(git show -s --format=%cd --date=iso-strict ${{ github.sha }})
echo "date=$commit_date" >> $GITHUB_OUTPUT
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Docker meta
id: meta
uses: docker/metadata-action@v4
with:
images: getmeili/meilisearch
# Prevent `latest` to be updated for each new tag pushed.
# We need latest and `vX.Y` tags to only be pushed for the stable Meilisearch releases.
flavor: latest=false
tags: |
type=ref,event=tag
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Build and push
uses: docker/build-push-action@v4
with:
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
build-args: |
COMMIT_SHA=${{ github.sha }}
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@v2
with:
token: ${{ secrets.MEILI_BOT_GH_PAT }}
repository: meilisearch/meilisearch-cloud
event-type: cloud-docker-build
client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }'

View File

@@ -1,30 +0,0 @@
---
on:
release:
types: [released]
name: Publish latest image to Docker Hub
jobs:
docker-latest:
runs-on: docker
steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Build and push
id: docker_build
uses: docker/build-push-action@v2
with:
push: true
platforms: linux/amd64,linux/arm64
tags: getmeili/meilisearch:latest

View File

@@ -1,39 +0,0 @@
---
on:
push:
tags:
- '*'
name: Publish tagged image to Docker Hub
jobs:
docker-tag:
runs-on: docker
steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Docker meta
id: meta
uses: docker/metadata-action@v3
with:
images: getmeili/meilisearch
flavor: latest=false
tags: type=ref,event=tag
- name: Build and push
id: docker_build
uses: docker/build-push-action@v2
with:
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}

View File

@@ -1,10 +1,7 @@
# Contributing
First, thank you for contributing to Meilisearch! The goal of this document is to provide everything you need to start contributing to Meilisearch.
First, thank you for contributing to MeiliSearch! The goal of this document is to provide everything you need to start contributing to MeiliSearch.
Remember that there are many ways to contribute other than writing code: writing [tutorials or blog posts](https://github.com/meilisearch/awesome-meilisearch), improving [the documentation](https://github.com/meilisearch/documentation), submitting [bug reports](https://github.com/meilisearch/meilisearch/issues/new?assignees=&labels=&template=bug_report.md&title=) and [feature requests](https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal)...
## Table of Contents
- [Assumptions](#assumptions)
- [How to Contribute](#how-to-contribute)
- [Development Workflow](#development-workflow)
@@ -13,8 +10,8 @@ Remember that there are many ways to contribute other than writing code: writing
## Assumptions
1. **You're familiar with [Github](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.**
2. **You've read the Meilisearch [documentation](https://docs.meilisearch.com).**
3. **You know about the [Meilisearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
2. **You've read the MeiliSearch [documentation](https://docs.meilisearch.com).**
3. **You know about the [MeiliSearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
Please use this for help.**
## How to Contribute
@@ -22,21 +19,21 @@ Remember that there are many ways to contribute other than writing code: writing
1. Ensure your change has an issue! Find an
[existing issue](https://github.com/meilisearch/meilisearch/issues/) or [open a new issue](https://github.com/meilisearch/meilisearch/issues/new).
* This is where you can get a feel if the change will be accepted or not.
2. Once approved, [fork the Meilisearch repository](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) in your own Github account.
2. Once approved, [fork the MeiliSearch repository](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) in your own Github account.
3. [Create a new Git branch](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-and-deleting-branches-within-your-repository)
4. Review the [Development Workflow](#development-workflow) section that describes the steps to maintain the repository.
5. Make your changes on your branch.
6. [Submit the branch as a Pull Request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork) pointing to the `main` branch of the Meilisearch repository. A maintainer should comment and/or review your Pull Request within a few days. Although depending on the circumstances, it may take longer.
6. [Submit the branch as a Pull Request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork) pointing to the `main` branch of the MeiliSearch repository. A maintainer should comment and/or review your Pull Request within a few days. Although depending on the circumstances, it may take longer.
## Development Workflow
### Setup and run Meilisearch
### Setup and run MeiliSearch
```bash
cargo run --release
```
We recommend using the `--release` flag to test the full performance of Meilisearch.
We recommend using the `--release` flag to test the full performance of MeiliSearch.
### Test

1486
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -5,3 +5,5 @@ members = [
"meilisearch-lib",
"meilisearch-auth",
]
resolver = "2"

View File

@@ -21,10 +21,7 @@ ENV RUSTFLAGS="-C target-feature=-crt-static"
# Create dummy main.rs files for each workspace member to be able to compile all the dependencies
RUN find . -type d -name "meilisearch-*" | xargs -I{} sh -c 'mkdir {}/src; echo "fn main() { }" > {}/src/main.rs;'
# Use `cargo build` instead of `cargo vendor` because we need to not only download but compile dependencies too
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
$HOME/.cargo/bin/cargo build --release
RUN $HOME/.cargo/bin/cargo build --release
# Cleanup dummy main.rs files
RUN find . -path "*/src/main.rs" -delete
@@ -33,10 +30,7 @@ ARG COMMIT_DATE
ENV COMMIT_SHA=${COMMIT_SHA} COMMIT_DATE=${COMMIT_DATE}
COPY . .
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
$HOME/.cargo/bin/cargo build --release
RUN $HOME/.cargo/bin/cargo build --release
# Run
FROM alpine:3.14

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019-2022 Meilisearch
Copyright (c) 2019-2021 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@@ -1,8 +1,8 @@
<p align="center">
<img src="assets/logo.svg" alt="Meilisearch" width="200" height="200" />
<img src="assets/logo.svg" alt="MeiliSearch" width="200" height="200" />
</p>
<h1 align="center">Meilisearch</h1>
<h1 align="center">MeiliSearch</h1>
<h4 align="center">
<a href="https://www.meilisearch.com">Website</a> |
@@ -15,17 +15,17 @@
</h4>
<p align="center">
<a href="https://github.com/meilisearch/meilisearch/actions"><img src="https://github.com/meilisearch/meilisearch/workflows/Cargo%20test/badge.svg" alt="Build Status"></a>
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
<a href="https://slack.meilisearch.com"><img src="https://img.shields.io/badge/slack-meilisearch-blue.svg?logo=slack" alt="Slack"></a>
<a href="https://github.com/meilisearch/meilisearch/discussions" alt="Discussions"><img src="https://img.shields.io/badge/github-discussions-red" /></a>
<a href="https://github.com/meilisearch/MeiliSearch/actions"><img src="https://github.com/meilisearch/MeiliSearch/workflows/Cargo%20test/badge.svg" alt="Build Status"></a>
<a href="https://deps.rs/repo/github/meilisearch/MeiliSearch"><img src="https://deps.rs/repo/github/meilisearch/MeiliSearch/status.svg" alt="Dependency status"></a>
<a href="https://github.com/meilisearch/MeiliSearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
<a href="https://slack.meilisearch.com"><img src="https://img.shields.io/badge/slack-MeiliSearch-blue.svg?logo=slack" alt="Slack"></a>
<a href="https://github.com/meilisearch/MeiliSearch/discussions" alt="Discussions"><img src="https://img.shields.io/badge/github-discussions-red" /></a>
<a href="https://app.bors.tech/repositories/26457"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
</p>
<p align="center">⚡ Lightning Fast, Ultra Relevant, and Typo-Tolerant Search Engine 🔍</p>
**Meilisearch** is a powerful, fast, open-source, easy to use and deploy search engine. Both searching and indexing are highly customizable. Features such as typo-tolerance, filters, and synonyms are provided out-of-the-box.
**MeiliSearch** is a powerful, fast, open-source, easy to use and deploy search engine. Both searching and indexing are highly customizable. Features such as typo-tolerance, filters, and synonyms are provided out-of-the-box.
For more information about features go to [our documentation](https://docs.meilisearch.com/).
<p align="center">
@@ -61,13 +61,13 @@ meilisearch
docker run -p 7700:7700 -v "$(pwd)/data.ms:/data.ms" getmeili/meilisearch
```
#### Announcing a cloud-hosted Meilisearch
#### Announcing a cloud-hosted MeiliSearch
Join the closed beta by filling out this [form](https://meilisearch.typeform.com/to/FtnzvZfh).
#### Try Meilisearch in our Sandbox
#### Try MeiliSearch in our Sandbox
Create a Meilisearch instance in [Meilisearch Sandbox](https://sandbox.meilisearch.com/). This instance is free, and will be active for 48 hours.
Create a MeiliSearch instance in [MeiliSearch Sandbox](https://sandbox.meilisearch.com/). This instance is free, and will be active for 48 hours.
#### Run on Digital Ocean
@@ -99,8 +99,8 @@ curl -L https://install.meilisearch.com | sh
If you have the latest stable Rust toolchain installed on your local system, clone the repository and change it to your working directory.
```bash
git clone https://github.com/meilisearch/meilisearch.git
cd meilisearch
git clone https://github.com/meilisearch/MeiliSearch.git
cd MeiliSearch
cargo run --release
```
@@ -161,19 +161,19 @@ curl 'http://127.0.0.1:7700/indexes/movies/search?q=botman+robin&limit=2' | jq
#### Use the Web Interface
We also deliver an **out-of-the-box [web interface](https://github.com/meilisearch/mini-dashboard)** in which you can test Meilisearch interactively.
We also deliver an **out-of-the-box [web interface](https://github.com/meilisearch/mini-dashboard)** in which you can test MeiliSearch interactively.
You can access the web interface in your web browser at the root of the server. The default URL is [http://127.0.0.1:7700](http://127.0.0.1:7700). All you need to do is open your web browser and enter Meilisearchs address to visit it. This will lead you to a web page with a search bar that will allow you to search in the selected index.
You can access the web interface in your web browser at the root of the server. The default URL is [http://127.0.0.1:7700](http://127.0.0.1:7700). All you need to do is open your web browser and enter MeiliSearchs address to visit it. This will lead you to a web page with a search bar that will allow you to search in the selected index.
| [See the gif above](#demo)
## Documentation
Now that your Meilisearch server is up and running, you can learn more about how to tune your search engine in [the documentation](https://docs.meilisearch.com).
Now that your MeiliSearch server is up and running, you can learn more about how to tune your search engine in [the documentation](https://docs.meilisearch.com).
## Contributing
Hey! We're glad you're thinking about contributing to Meilisearch! Feel free to pick an [issue labeled as `good first issue`](https://github.com/meilisearch/meilisearch/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22), and to ask any question you need. Some points might not be clear and we are available to help you!
Hey! We're glad you're thinking about contributing to MeiliSearch! Feel free to pick an [issue labeled as `good first issue`](https://github.com/meilisearch/MeiliSearch/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22), and to ask any question you need. Some points might not be clear and we are available to help you!
Also, we recommend following the [CONTRIBUTING](./CONTRIBUTING.md) to create your PR.
@@ -184,8 +184,8 @@ The code in this repository is only concerned with managing multiple indexes, ha
Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/milli), while tokenization is handled by [our `tokenizer` library](https://github.com/meilisearch/tokenizer/).
## Telemetry
Meilisearch collects anonymous data regarding general usage.
This helps us better understand developers' usage of Meilisearch features.
MeiliSearch collects anonymous data regarding general usage.
This helps us better understand developers' usage of MeiliSearch features.
To find out more on what information we're retrieving, please see our documentation on [Telemetry](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html).
@@ -193,7 +193,7 @@ This program is optional, you can disable these analytics by using the `MEILI_NO
## Feature request
The feature requests are not managed in this repository. Please visit our [dedicated repository](https://github.com/meilisearch/product) to see our work about the Meilisearch product.
The feature requests are not managed in this repository. Please visit our [dedicated repository](https://github.com/meilisearch/product) to see our work about the MeiliSearch product.
If you have a feature request or any feedback about an existing feature, please open [a discussion](https://github.com/meilisearch/product/discussions).
Also, feel free to participate in the current discussions, we are looking forward to reading your comments.
@@ -202,4 +202,4 @@ Also, feel free to participate in the current discussions, we are looking forwar
Please visit [this page](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html#contact-us).
Meilisearch is developed by [Meili](https://www.meilisearch.com), a young company. To know more about us, you can [read our blog](https://blog.meilisearch.com). Any suggestion or feedback is highly appreciated. Thank you for your support!
MeiliSearch is developed by [Meili](https://www.meilisearch.com), a young company. To know more about us, you can [read our blog](https://blog.meilisearch.com). Any suggestion or feedback is highly appreciated. Thank you for your support!

View File

@@ -1,16 +1,16 @@
# Security
Meilisearch takes the security of our software products and services seriously.
MeiliSearch takes the security of our software products and services seriously.
If you believe you have found a security vulnerability in any Meilisearch-owned repository, please report it to us as described below.
If you believe you have found a security vulnerability in any MeiliSearch-owned repository, please report it to us as described below.
## Suported versions
As long as we are pre-v1.0, only the latest version of Meilisearch will be supported with security updates.
As long as we are pre-v1.0, only the latest version of MeiliSearch will be supported with security updates.
## Reporting security issues
⚠️ Please do not report security vulnerabilities through public GitHub issues. ⚠️
⚠️ Please do not report security vulnerabilities through public GitHub issues. ⚠️
Instead, please kindly email us at security@meilisearch.com

View File

@@ -1,19 +1,17 @@
<svg width="300" height="300" viewBox="0 0 300 300" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M0 237L55.426 96.7678C63.2367 77.0063 82.499 64 103.955 64H137.371L81.9447 204.232C74.1341 223.993 54.8717 237 33.4156 237H0Z" fill="url(#paint0_linear_1_898)"/>
<path d="M81.3123 237L136.738 96.7682C144.549 77.0067 163.811 64.0004 185.267 64.0004H218.683L163.257 204.232C155.446 223.994 136.184 237 114.728 237H81.3123Z" fill="url(#paint1_linear_1_898)"/>
<path d="M162.629 237L218.055 96.7682C225.866 77.0067 245.128 64.0004 266.584 64.0004H300L244.574 204.232C236.763 223.994 217.501 237 196.045 237H162.629Z" fill="url(#paint2_linear_1_898)"/>
<svg width="360" height="360" viewBox="0 0 360 360" fill="none" xmlns="http://www.w3.org/2000/svg">
<g id="logo_main">
<rect id="Rectangle" x="107.333" y="0.150146" width="274.315" height="274.315" rx="98.8334" transform="rotate(23 107.333 0.150146)" fill="url(#paint0_linear)"/>
<path id="Rectangle_2" fill-rule="evenodd" clip-rule="evenodd" d="M61.3296 230.199C46.2224 194.608 38.6688 176.813 38.208 160.329C37.5286 136.025 47.0175 112.539 64.3891 95.5282C76.1718 83.9904 93.9669 76.4368 129.557 61.3296C165.147 46.2224 182.943 38.6688 199.427 38.208C223.731 37.5286 247.217 47.0175 264.228 64.3891C275.766 76.1718 283.319 93.9669 298.426 129.557C313.534 165.147 321.087 182.943 321.548 199.427C322.227 223.731 312.738 247.217 295.367 264.228C283.584 275.766 265.789 283.319 230.199 298.426C194.608 313.534 176.813 321.087 160.329 321.548C136.025 322.227 112.539 312.738 95.5282 295.367C83.9903 283.584 76.4368 265.789 61.3296 230.199Z" fill="url(#paint1_linear)"/>
<path id="m" fill-rule="evenodd" clip-rule="evenodd" d="M219.568 130.748C242.363 130.748 259.263 147.451 259.263 174.569V229.001H227.232V179.678C227.232 166.119 220.747 159.634 210.136 159.634C205.223 159.634 200.311 161.796 195.595 167.494C195.791 169.852 195.988 172.21 195.988 174.569V229.001H164.154V179.678C164.154 166.119 157.472 159.634 147.057 159.634C142.145 159.634 137.429 161.992 132.712 168.084V229.001H100.878V133.695H132.712V139.394C139.197 133.892 145.878 130.748 156.49 130.748C168.477 130.748 178.695 135.267 185.769 143.52C195.791 134.678 205.42 130.748 219.568 130.748Z" fill="white"/>
</g>
<defs>
<linearGradient id="paint0_linear_1_898" x1="300.001" y1="50.7858" x2="1.63474" y2="221.244" gradientUnits="userSpaceOnUse">
<stop stop-color="#FF5CAA"/>
<stop offset="1" stop-color="#FF4E62"/>
<linearGradient id="paint0_linear" x1="-13.6248" y1="129.208" x2="244.49" y2="403.522" gradientUnits="userSpaceOnUse">
<stop stop-color="#E41359"/>
<stop offset="1" stop-color="#F23C79"/>
</linearGradient>
<linearGradient id="paint1_linear_1_898" x1="300.001" y1="50.7858" x2="1.63474" y2="221.244" gradientUnits="userSpaceOnUse">
<stop stop-color="#FF5CAA"/>
<stop offset="1" stop-color="#FF4E62"/>
</linearGradient>
<linearGradient id="paint2_linear_1_898" x1="300.001" y1="50.7858" x2="1.63474" y2="221.244" gradientUnits="userSpaceOnUse">
<stop stop-color="#FF5CAA"/>
<stop offset="1" stop-color="#FF4E62"/>
<linearGradient id="paint1_linear" x1="11.0088" y1="111.65" x2="111.65" y2="348.747" gradientUnits="userSpaceOnUse">
<stop stop-color="#24222F"/>
<stop offset="1" stop-color="#2B2937"/>
</linearGradient>
</defs>
</svg>

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 2.0 KiB

View File

@@ -73,10 +73,10 @@ semverLT() {
get_latest() {
temp_file='temp_file' # temp_file needed because the grep would start before the download is over
if [ -z "$GITHUB_PAT" ]; then
curl -s 'https://api.github.com/repos/meilisearch/meilisearch/releases' > "$temp_file" || return 1
if [ -z "$GITHUB_PAT" ]; then
curl -s 'https://api.github.com/repos/meilisearch/MeiliSearch/releases' > "$temp_file" || return 1
else
curl -H "Authorization: token $GITHUB_PAT" -s 'https://api.github.com/repos/meilisearch/meilisearch/releases' > "$temp_file" || return 1
curl -H "Authorization: token $GITHUB_PAT" -s 'https://api.github.com/repos/meilisearch/MeiliSearch/releases' > "$temp_file" || return 1
fi
releases=$(cat "$temp_file" | \
@@ -120,7 +120,7 @@ get_latest() {
done
rm -f "$temp_file"
return 0
echo $latest
}
# Gets the OS by setting the $os variable
@@ -148,18 +148,11 @@ get_os() {
get_archi() {
architecture=$(uname -m)
case "$architecture" in
'x86_64' | 'amd64' )
'x86_64' | 'amd64' | 'arm64')
archi='amd64'
;;
'arm64')
if [ $os = 'macos' ]; then # MacOS M1
archi='amd64'
else
archi='aarch64'
fi
;;
'aarch64')
archi='aarch64'
archi='armv8'
;;
*)
return 1
@@ -168,7 +161,7 @@ get_archi() {
}
success_usage() {
printf "$GREEN%s\n$DEFAULT" "Meilisearch $latest binary successfully downloaded as '$binary_name' file."
printf "$GREEN%s\n$DEFAULT" "MeiliSearch $latest binary successfully downloaded as '$binary_name' file."
echo ''
echo 'Run it:'
echo ' $ ./meilisearch'
@@ -176,65 +169,47 @@ success_usage() {
echo ' $ ./meilisearch --help'
}
not_available_failure_usage() {
printf "$RED%s\n$DEFAULT" 'ERROR: Meilisearch binary is not available for your OS distribution or your architecture yet.'
failure_usage() {
printf "$RED%s\n$DEFAULT" 'ERROR: MeiliSearch binary is not available for your OS distribution or your architecture yet.'
echo ''
echo 'However, you can easily compile the binary from the source files.'
echo 'Follow the steps at the page ("Source" tab): https://docs.meilisearch.com/learn/getting_started/installation.html'
}
fetch_release_failure_usage() {
echo ''
printf "$RED%s\n$DEFAULT" 'ERROR: Impossible to get the latest stable version of Meilisearch.'
echo 'Please let us know about this issue: https://github.com/meilisearch/meilisearch/issues/new/choose'
}
# MAIN
# Fill $latest variable
if ! get_latest; then
fetch_release_failure_usage # TO CHANGE
exit 1
fi
latest="$(get_latest)"
if [ "$latest" = '' ]; then
fetch_release_failure_usage
echo ''
echo 'Impossible to get the latest stable version of MeiliSearch.'
echo 'Please let us know about this issue: https://github.com/meilisearch/MeiliSearch/issues/new/choose'
exit 1
fi
# Fill $os variable
if ! get_os; then
not_available_failure_usage
failure_usage
exit 1
fi
# Fill $archi variable
if ! get_archi; then
not_available_failure_usage
failure_usage
exit 1
fi
echo "Downloading Meilisearch binary $latest for $os, architecture $archi..."
echo "Downloading MeiliSearch binary $latest for $os, architecture $archi..."
case "$os" in
'windows')
release_file="meilisearch-$os-$archi.exe"
binary_name='meilisearch.exe'
binary_name='meilisearch.exe'
;;
*)
release_file="meilisearch-$os-$archi"
binary_name='meilisearch'
*)
release_file="meilisearch-$os-$archi"
binary_name='meilisearch'
esac
# Fetch the Meilisearch binary
link="https://github.com/meilisearch/meilisearch/releases/download/$latest/$release_file"
curl --fail -OL "$link"
if [ $? -ne 0 ]; then
fetch_release_failure_usage
exit 1
fi
link="https://github.com/meilisearch/MeiliSearch/releases/download/$latest/$release_file"
curl -OL "$link"
mv "$release_file" "$binary_name"
chmod 744 "$binary_name"
success_usage

View File

@@ -1,15 +1,15 @@
[package]
name = "meilisearch-auth"
version = "0.26.0"
edition = "2021"
version = "0.25.0"
edition = "2018"
[dependencies]
enum-iterator = "0.7.0"
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
sha2 = "0.9.6"
chrono = { version = "0.4.19", features = ["serde"] }
meilisearch-error = { path = "../meilisearch-error" }
serde_json = { version = "1.0.67", features = ["preserve_order"] }
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
rand = "0.8.4"
serde = { version = "1.0.130", features = ["derive"] }
thiserror = "1.0.28"

View File

@@ -10,10 +10,7 @@ const KEYS_PATH: &str = "keys";
impl AuthController {
pub fn dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
let mut store = HeedAuthStore::new(&src)?;
// do not attempt to close the database on drop!
store.set_drop_on_close(false);
let store = HeedAuthStore::new(&src)?;
let keys_file_path = dst.as_ref().join(KEYS_PATH);

View File

@@ -10,13 +10,13 @@ pub type Result<T> = std::result::Result<T, AuthControllerError>;
pub enum AuthControllerError {
#[error("`{0}` field is mandatory.")]
MissingParameter(&'static str),
#[error("`actions` field value `{0}` is invalid. It should be an array of string representing action names.")]
#[error("actions field value `{0}` is invalid. It should be an array of string representing action names.")]
InvalidApiKeyActions(Value),
#[error("`indexes` field value `{0}` is invalid. It should be an array of string representing index names.")]
#[error("indexes field value `{0}` is invalid. It should be an array of string representing index names.")]
InvalidApiKeyIndexes(Value),
#[error("`expiresAt` field value `{0}` is invalid. It should follow the RFC 3339 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'.")]
#[error("expiresAt field value `{0}` is invalid. It should be in ISO-8601 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DDTHH:MM:SS'.")]
InvalidApiKeyExpiresAt(Value),
#[error("`description` field value `{0}` is invalid. It should be a string or specified as a null value.")]
#[error("description field value `{0}` is invalid. It should be a string or specified as a null value.")]
InvalidApiKeyDescription(Value),
#[error("API key `{0}` not found.")]
ApiKeyNotFound(String),

View File

@@ -1,12 +1,10 @@
use crate::action::Action;
use crate::error::{AuthControllerError, Result};
use crate::store::{KeyId, KEY_ID_LENGTH};
use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};
use rand::Rng;
use serde::{Deserialize, Serialize};
use serde_json::{from_value, Value};
use time::format_description::well_known::Rfc3339;
use time::macros::{format_description, time};
use time::{Date, OffsetDateTime, PrimitiveDateTime};
#[derive(Debug, Deserialize, Serialize)]
pub struct Key {
@@ -15,24 +13,20 @@ pub struct Key {
pub id: KeyId,
pub actions: Vec<Action>,
pub indexes: Vec<String>,
#[serde(with = "time::serde::rfc3339::option")]
pub expires_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
pub expires_at: Option<DateTime<Utc>>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
impl Key {
pub fn create_from_value(value: Value) -> Result<Self> {
let description = match value.get("description") {
Some(Value::Null) => None,
Some(des) => Some(
let description = value
.get("description")
.map(|des| {
from_value(des.clone())
.map_err(|_| AuthControllerError::InvalidApiKeyDescription(des.clone()))?,
),
None => None,
};
.map_err(|_| AuthControllerError::InvalidApiKeyDescription(des.clone()))
})
.transpose()?;
let id = generate_id();
@@ -57,8 +51,8 @@ impl Key {
.map(parse_expiration_date)
.ok_or(AuthControllerError::MissingParameter("expiresAt"))??;
let created_at = OffsetDateTime::now_utc();
let updated_at = created_at;
let created_at = Utc::now();
let updated_at = Utc::now();
Ok(Self {
description,
@@ -94,26 +88,24 @@ impl Key {
self.expires_at = parse_expiration_date(exp)?;
}
self.updated_at = OffsetDateTime::now_utc();
self.updated_at = Utc::now();
Ok(())
}
pub(crate) fn default_admin() -> Self {
let now = OffsetDateTime::now_utc();
Self {
description: Some("Default Admin API Key (Use it for all other operations. Caution! Do not use it on a public frontend)".to_string()),
id: generate_id(),
actions: vec![Action::All],
indexes: vec!["*".to_string()],
expires_at: None,
created_at: now,
updated_at: now,
created_at: Utc::now(),
updated_at: Utc::now(),
}
}
pub(crate) fn default_search() -> Self {
let now = OffsetDateTime::now_utc();
Self {
description: Some(
"Default Search API Key (Use it to search from the frontend)".to_string(),
@@ -122,8 +114,8 @@ impl Key {
actions: vec![Action::Search],
indexes: vec!["*".to_string()],
expires_at: None,
created_at: now,
updated_at: now,
created_at: Utc::now(),
updated_at: Utc::now(),
}
}
}
@@ -141,34 +133,22 @@ fn generate_id() -> [u8; KEY_ID_LENGTH] {
bytes
}
fn parse_expiration_date(value: &Value) -> Result<Option<OffsetDateTime>> {
fn parse_expiration_date(value: &Value) -> Result<Option<DateTime<Utc>>> {
match value {
Value::String(string) => OffsetDateTime::parse(string, &Rfc3339)
Value::String(string) => DateTime::parse_from_rfc3339(string)
.map(|d| d.into())
.or_else(|_| {
PrimitiveDateTime::parse(
string,
format_description!(
"[year repr:full base:calendar]-[month repr:numerical]-[day]T[hour]:[minute]:[second]"
),
).map(|datetime| datetime.assume_utc())
NaiveDateTime::parse_from_str(string, "%Y-%m-%dT%H:%M:%S")
.map(|naive| DateTime::from_utc(naive, Utc))
})
.or_else(|_| {
PrimitiveDateTime::parse(
string,
format_description!(
"[year repr:full base:calendar]-[month repr:numerical]-[day] [hour]:[minute]:[second]"
),
).map(|datetime| datetime.assume_utc())
})
.or_else(|_| {
Date::parse(string, format_description!(
"[year repr:full base:calendar]-[month repr:numerical]-[day]"
)).map(|date| PrimitiveDateTime::new(date, time!(00:00)).assume_utc())
NaiveDate::parse_from_str(string, "%Y-%m-%d")
.map(|naive| DateTime::from_utc(naive.and_hms(0, 0, 0), Utc))
})
.map_err(|_| AuthControllerError::InvalidApiKeyExpiresAt(value.clone()))
// check if the key is already expired.
.and_then(|d| {
if d > OffsetDateTime::now_utc() {
if d > Utc::now() {
Ok(d)
} else {
Err(AuthControllerError::InvalidApiKeyExpiresAt(value.clone()))

View File

@@ -4,20 +4,17 @@ pub mod error;
mod key;
mod store;
use std::collections::{HashMap, HashSet};
use std::path::Path;
use std::str::from_utf8;
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use chrono::Utc;
use serde_json::Value;
use sha2::{Digest, Sha256};
use time::OffsetDateTime;
pub use action::{actions, Action};
use error::{AuthControllerError, Result};
pub use key::Key;
pub use store::open_auth_store_env;
use store::HeedAuthStore;
#[derive(Clone)]
@@ -40,28 +37,24 @@ impl AuthController {
})
}
pub fn create_key(&self, value: Value) -> Result<Key> {
pub async fn create_key(&self, value: Value) -> Result<Key> {
let key = Key::create_from_value(value)?;
self.store.put_api_key(key)
}
pub fn update_key(&self, key: impl AsRef<str>, value: Value) -> Result<Key> {
let mut key = self.get_key(key)?;
pub async fn update_key(&self, key: impl AsRef<str>, value: Value) -> Result<Key> {
let mut key = self.get_key(key).await?;
key.update_from_value(value)?;
self.store.put_api_key(key)
}
pub fn get_key(&self, key: impl AsRef<str>) -> Result<Key> {
pub async fn get_key(&self, key: impl AsRef<str>) -> Result<Key> {
self.store
.get_api_key(&key)?
.ok_or_else(|| AuthControllerError::ApiKeyNotFound(key.as_ref().to_string()))
}
pub fn get_key_filters(
&self,
key: impl AsRef<str>,
search_rules: Option<SearchRules>,
) -> Result<AuthFilter> {
pub fn get_key_filters(&self, key: impl AsRef<str>) -> Result<AuthFilter> {
let mut filters = AuthFilter::default();
if self
.master_key
@@ -74,22 +67,7 @@ impl AuthController {
.ok_or_else(|| AuthControllerError::ApiKeyNotFound(key.as_ref().to_string()))?;
if !key.indexes.iter().any(|i| i.as_str() == "*") {
filters.search_rules = match search_rules {
// Intersect search_rules with parent key authorized indexes.
Some(search_rules) => SearchRules::Map(
key.indexes
.into_iter()
.filter_map(|index| {
search_rules
.get_index_search_rules(&index)
.map(|index_search_rules| (index, Some(index_search_rules)))
})
.collect(),
),
None => SearchRules::Set(key.indexes.into_iter().collect()),
};
} else if let Some(search_rules) = search_rules {
filters.search_rules = search_rules;
filters.indexes = Some(key.indexes);
}
filters.allow_index_creation = key
@@ -101,11 +79,11 @@ impl AuthController {
Ok(filters)
}
pub fn list_keys(&self) -> Result<Vec<Key>> {
pub async fn list_keys(&self) -> Result<Vec<Key>> {
self.store.list_api_keys()
}
pub fn delete_key(&self, key: impl AsRef<str>) -> Result<()> {
pub async fn delete_key(&self, key: impl AsRef<str>) -> Result<()> {
if self.store.delete_api_key(&key)? {
Ok(())
} else {
@@ -119,149 +97,50 @@ impl AuthController {
self.master_key.as_ref()
}
/// Generate a valid key from a key id using the current master key.
/// Returns None if no master key has been set.
pub fn generate_key(&self, id: &str) -> Option<String> {
self.master_key
.as_ref()
.map(|master_key| generate_key(master_key.as_bytes(), id))
}
/// Check if the provided key is authorized to make a specific action
/// without checking if the key is valid.
pub fn is_key_authorized(
&self,
key: &[u8],
action: Action,
index: Option<&str>,
) -> Result<bool> {
match self
.store
// check if the key has access to all indexes.
.get_expiration_date(key, action, None)?
.or(match index {
// else check if the key has access to the requested index.
Some(index) => {
self.store
.get_expiration_date(key, action, Some(index.as_bytes()))?
pub fn authenticate(&self, token: &[u8], action: Action, index: Option<&[u8]>) -> Result<bool> {
if let Some(master_key) = &self.master_key {
if let Some((id, exp)) = self
.store
// check if the key has access to all indexes.
.get_expiration_date(token, action, None)?
.or(match index {
// else check if the key has access to the requested index.
Some(index) => self.store.get_expiration_date(token, action, Some(index))?,
// or to any index if no index has been requested.
None => self.store.prefix_first_expiration_date(token, action)?,
})
{
let id = from_utf8(&id)?;
if exp.map_or(true, |exp| Utc::now() < exp)
&& generate_key(master_key.as_bytes(), id).as_bytes() == token
{
return Ok(true);
}
// or to any index if no index has been requested.
None => self.store.prefix_first_expiration_date(key, action)?,
}) {
// check expiration date.
Some(Some(exp)) => Ok(OffsetDateTime::now_utc() < exp),
// no expiration date.
Some(None) => Ok(true),
// action or index forbidden.
None => Ok(false),
}
}
/// Check if the provided key is valid
/// without checking if the key is authorized to make a specific action.
pub fn is_key_valid(&self, key: &[u8]) -> Result<bool> {
if let Some(id) = self.store.get_key_id(key) {
let id = from_utf8(&id)?;
if let Some(generated) = self.generate_key(id) {
return Ok(generated.as_bytes() == key);
}
}
Ok(false)
}
/// Check if the provided key is valid
/// and is authorized to make a specific action.
pub fn authenticate(&self, key: &[u8], action: Action, index: Option<&str>) -> Result<bool> {
if self.is_key_authorized(key, action, index)? {
self.is_key_valid(key)
} else {
Ok(false)
}
}
}
pub struct AuthFilter {
pub search_rules: SearchRules,
pub indexes: Option<Vec<String>>,
pub allow_index_creation: bool,
}
impl Default for AuthFilter {
fn default() -> Self {
Self {
search_rules: SearchRules::default(),
indexes: None,
allow_index_creation: true,
}
}
}
/// Transparent wrapper around a list of allowed indexes with the search rules to apply for each.
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(untagged)]
pub enum SearchRules {
Set(HashSet<String>),
Map(HashMap<String, Option<IndexSearchRules>>),
}
impl Default for SearchRules {
fn default() -> Self {
Self::Set(Some("*".to_string()).into_iter().collect())
}
}
impl SearchRules {
pub fn is_index_authorized(&self, index: &str) -> bool {
match self {
Self::Set(set) => set.contains("*") || set.contains(index),
Self::Map(map) => map.contains_key("*") || map.contains_key(index),
}
}
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
match self {
Self::Set(set) => {
if set.contains("*") || set.contains(index) {
Some(IndexSearchRules::default())
} else {
None
}
}
Self::Map(map) => map
.get(index)
.or_else(|| map.get("*"))
.map(|isr| isr.clone().unwrap_or_default()),
}
}
}
impl IntoIterator for SearchRules {
type Item = (String, IndexSearchRules);
type IntoIter = Box<dyn Iterator<Item = Self::Item>>;
fn into_iter(self) -> Self::IntoIter {
match self {
Self::Set(array) => {
Box::new(array.into_iter().map(|i| (i, IndexSearchRules::default())))
}
Self::Map(map) => {
Box::new(map.into_iter().map(|(i, isr)| (i, isr.unwrap_or_default())))
}
}
}
}
/// Contains the rules to apply on the top of the search query for a specific index.
///
/// filter: search filter to apply in addition to query filters.
#[derive(Debug, Serialize, Deserialize, Default, Clone)]
pub struct IndexSearchRules {
pub filter: Option<serde_json::Value>,
}
fn generate_key(master_key: &[u8], keyid: &str) -> String {
let key = [keyid.as_bytes(), master_key].concat();
pub fn generate_key(master_key: &[u8], uid: &str) -> String {
let key = [uid.as_bytes(), master_key].concat();
let sha = Sha256::digest(&key);
format!("{}{:x}", keyid, sha)
format!("{}{:x}", uid, sha)
}
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {

View File

@@ -6,11 +6,10 @@ use std::convert::TryInto;
use std::fs::create_dir_all;
use std::path::Path;
use std::str;
use std::sync::Arc;
use chrono::{DateTime, Utc};
use heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use heed::{Database, Env, EnvOpenOptions, RwTxn};
use time::OffsetDateTime;
use super::error::Result;
use super::{Action, Key};
@@ -25,32 +24,19 @@ pub type KeyId = [u8; KEY_ID_LENGTH];
#[derive(Clone)]
pub struct HeedAuthStore {
env: Arc<Env>,
env: Env,
keys: Database<ByteSlice, SerdeJson<Key>>,
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
should_close_on_drop: bool,
}
impl Drop for HeedAuthStore {
fn drop(&mut self) {
if self.should_close_on_drop && Arc::strong_count(&self.env) == 1 {
self.env.as_ref().clone().prepare_for_closing();
}
}
}
pub fn open_auth_store_env(path: &Path) -> heed::Result<heed::Env> {
let mut options = EnvOpenOptions::new();
options.map_size(AUTH_STORE_SIZE); // 1GB
options.max_dbs(2);
options.open(path)
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<DateTime<Utc>>>>,
}
impl HeedAuthStore {
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().join(AUTH_DB_PATH);
create_dir_all(&path)?;
let env = Arc::new(open_auth_store_env(path.as_ref())?);
let mut options = EnvOpenOptions::new();
options.map_size(AUTH_STORE_SIZE); // 1GB
options.max_dbs(2);
let env = options.open(path)?;
let keys = env.create_database(Some(KEY_DB_NAME))?;
let action_keyid_index_expiration =
env.create_database(Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
@@ -58,14 +44,9 @@ impl HeedAuthStore {
env,
keys,
action_keyid_index_expiration,
should_close_on_drop: true,
})
}
pub fn set_drop_on_close(&mut self, v: bool) {
self.should_close_on_drop = v;
}
pub fn is_empty(&self) -> Result<bool> {
let rtxn = self.env.read_txn()?;
@@ -113,18 +94,18 @@ impl HeedAuthStore {
pub fn get_api_key(&self, key: impl AsRef<str>) -> Result<Option<Key>> {
let rtxn = self.env.read_txn()?;
match self.get_key_id(key.as_ref().as_bytes()) {
Some(id) => self.keys.get(&rtxn, &id).map_err(|e| e.into()),
match try_split_array_at::<_, KEY_ID_LENGTH>(key.as_ref().as_bytes()) {
Some((id, _)) => self.keys.get(&rtxn, id).map_err(|e| e.into()),
None => Ok(None),
}
}
pub fn delete_api_key(&self, key: impl AsRef<str>) -> Result<bool> {
let mut wtxn = self.env.write_txn()?;
let existing = match self.get_key_id(key.as_ref().as_bytes()) {
Some(id) => {
let existing = self.keys.delete(&mut wtxn, &id)?;
self.delete_key_from_inverted_db(&mut wtxn, &id)?;
let existing = match try_split_array_at(key.as_ref().as_bytes()) {
Some((id, _)) => {
let existing = self.keys.delete(&mut wtxn, id)?;
self.delete_key_from_inverted_db(&mut wtxn, id)?;
existing
}
None => false,
@@ -150,12 +131,15 @@ impl HeedAuthStore {
key: &[u8],
action: Action,
index: Option<&[u8]>,
) -> Result<Option<Option<OffsetDateTime>>> {
) -> Result<Option<(KeyId, Option<DateTime<Utc>>)>> {
let rtxn = self.env.read_txn()?;
match self.get_key_id(key) {
Some(id) => {
let tuple = (&id, &action, index);
Ok(self.action_keyid_index_expiration.get(&rtxn, &tuple)?)
match try_split_array_at::<_, KEY_ID_LENGTH>(key) {
Some((id, _)) => {
let tuple = (id, &action, index);
Ok(self
.action_keyid_index_expiration
.get(&rtxn, &tuple)?
.map(|expiration| (*id, expiration)))
}
None => Ok(None),
}
@@ -165,26 +149,22 @@ impl HeedAuthStore {
&self,
key: &[u8],
action: Action,
) -> Result<Option<Option<OffsetDateTime>>> {
) -> Result<Option<(KeyId, Option<DateTime<Utc>>)>> {
let rtxn = self.env.read_txn()?;
match self.get_key_id(key) {
Some(id) => {
let tuple = (&id, &action, None);
match try_split_array_at::<_, KEY_ID_LENGTH>(key) {
Some((id, _)) => {
let tuple = (id, &action, None);
Ok(self
.action_keyid_index_expiration
.prefix_iter(&rtxn, &tuple)?
.next()
.transpose()?
.map(|(_, expiration)| expiration))
.map(|(_, expiration)| (*id, expiration)))
}
None => Ok(None),
}
}
pub fn get_key_id(&self, key: &[u8]) -> Option<KeyId> {
try_split_array_at::<_, KEY_ID_LENGTH>(key).map(|(id, _)| *id)
}
fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
let mut iter = self
.action_keyid_index_expiration

View File

@@ -1,11 +1,12 @@
[package]
name = "meilisearch-error"
version = "0.26.0"
version = "0.25.2"
authors = ["marin <postma.marin@protonmail.com>"]
edition = "2021"
edition = "2018"
[dependencies]
actix-web = { version = "4", default-features = false }
actix-http = "=3.0.0-beta.10"
actix-web = "4.0.0-beta.9"
proptest = { version = "1.0.0", optional = true }
proptest-derive = { version = "0.3.0", optional = true }
serde = { version = "1.0.130", features = ["derive"] }

View File

@@ -1,6 +1,7 @@
use std::fmt;
use actix_web::{self as aweb, http::StatusCode, HttpResponseBuilder};
use actix_http::{body::Body, http::StatusCode};
use actix_web::{self as aweb, HttpResponseBuilder};
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
@@ -58,7 +59,7 @@ where
}
impl aweb::error::ResponseError for ResponseError {
fn error_response(&self) -> aweb::HttpResponse {
fn error_response(&self) -> aweb::HttpResponse<Body> {
let json = serde_json::to_vec(self).unwrap();
HttpResponseBuilder::new(self.status_code())
.content_type("application/json")

View File

@@ -1,17 +1,17 @@
[package]
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
edition = "2021"
description = "MeiliSearch HTTP server"
edition = "2018"
license = "MIT"
name = "meilisearch-http"
version = "0.26.0"
version = "0.25.2"
[[bin]]
name = "meilisearch"
path = "src/main.rs"
[build-dependencies]
static-files = { version = "0.2.1", optional = true }
actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true }
anyhow = { version = "1.0.43", optional = true }
cargo_toml = { version = "0.9", optional = true }
hex = { version = "0.4.3", optional = true }
@@ -22,16 +22,19 @@ vergen = { version = "5.1.15", default-features = false, features = ["git"] }
zip = { version = "0.5.13", optional = true }
[dependencies]
actix-cors = "0.6"
actix-web = { version = "4", features = ["rustls"] }
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
actix-cors = { git = "https://github.com/MarinPostma/actix-extras.git", rev = "963ac94d" }
actix-web = { version = "4.0.0-beta.9", features = ["rustls"] }
actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true }
# TODO: specifying this dependency so semver doesn't bump to next beta
actix-tls = "=3.0.0-beta.5"
anyhow = { version = "1.0.43", features = ["backtrace"] }
arc-swap = "1.3.2"
async-stream = "0.3.2"
async-trait = "0.1.51"
bstr = "0.2.17"
byte-unit = { version = "4.0.12", default-features = false, features = ["std", "serde"] }
byte-unit = { version = "4.0.12", default-features = false, features = ["std"] }
bytes = "1.1.0"
chrono = { version = "0.4.19", features = ["serde"] }
crossbeam-channel = "0.5.1"
either = "1.6.1"
env_logger = "0.9.0"
@@ -42,9 +45,7 @@ futures-util = "0.3.17"
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
http = "0.2.4"
indexmap = { version = "1.7.0", features = ["serde-1"] }
iso8601-duration = "0.1.0"
itertools = "0.10.1"
jsonwebtoken = "7"
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-error = { path = "../meilisearch-error" }
@@ -54,30 +55,27 @@ num_cpus = "1.13.0"
obkv = "0.2.0"
once_cell = "1.8.0"
parking_lot = "0.11.2"
pin-project = "1.0.8"
platform-dirs = "0.3.0"
rand = "0.8.4"
rayon = "1.5.1"
regex = "1.5.4"
rustls = "0.20.2"
rustls-pemfile = "0.2"
segment = { version = "0.2.0", optional = true }
rustls = "0.19.1"
segment = { version = "0.1.2", optional = true }
serde = { version = "1.0.130", features = ["derive"] }
serde_json = { version = "1.0.67", features = ["preserve_order"] }
sha2 = "0.9.6"
siphasher = "0.3.7"
slice-group-by = "0.2.6"
static-files = { version = "0.2.1", optional = true }
clap = { version = "3.0", features = ["derive", "env"] }
structopt = "0.3.25"
sysinfo = "0.20.2"
tar = "0.4.37"
tempfile = "3.2.0"
thiserror = "1.0.28"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = { version = "1.11.0", features = ["full"] }
tokio-stream = "0.1.7"
uuid = { version = "0.8.2", features = ["serde"] }
walkdir = "2.3.2"
pin-project-lite = "0.2.8"
[dev-dependencies]
actix-rt = "2.2.0"
@@ -90,7 +88,6 @@ urlencoding = "2.1.0"
[features]
mini-dashboard = [
"actix-web-static-files",
"static-files",
"anyhow",
"cargo_toml",
"hex",
@@ -106,5 +103,5 @@ default = ["analytics", "mini-dashboard"]
tikv-jemallocator = "0.4.1"
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.9/build.zip"
sha1 = "b1833c3e5dc6b5d9d519ae4834935ae6c8a47024"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.7/build.zip"
sha1 = "e2feedf271917c4b7b88998eff5aaaea1d3925b9"

View File

@@ -16,11 +16,11 @@ mod mini_dashboard {
use std::io::{Cursor, Read, Write};
use std::path::PathBuf;
use actix_web_static_files::resource_dir;
use anyhow::Context;
use cargo_toml::Manifest;
use reqwest::blocking::get;
use sha1::{Digest, Sha1};
use static_files::resource_dir;
pub fn setup_mini_dashboard() -> anyhow::Result<()> {
let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());

View File

@@ -29,12 +29,12 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type SearchAggregator = segment_analytics::SearchAggregator;
/// The Meilisearch config dir:
/// `~/.config/Meilisearch` on *NIX or *BSD.
/// The MeiliSearch config dir:
/// `~/.config/MeiliSearch` on *NIX or *BSD.
/// `~/Library/ApplicationSupport` on macOS.
/// `%APPDATA` (= `C:\Users%USERNAME%\AppData\Roaming`) on windows.
static MEILISEARCH_CONFIG_PATH: Lazy<Option<PathBuf>> =
Lazy::new(|| AppDirs::new(Some("Meilisearch"), false).map(|appdir| appdir.config_dir));
Lazy::new(|| AppDirs::new(Some("MeiliSearch"), false).map(|appdir| appdir.config_dir));
fn config_user_id_path(db_path: &Path) -> Option<PathBuf> {
db_path
@@ -44,13 +44,13 @@ fn config_user_id_path(db_path: &Path) -> Option<PathBuf> {
path.join("instance-uid")
.display()
.to_string()
.replace('/', "-")
.replace("/", "-")
})
.zip(MEILISEARCH_CONFIG_PATH.as_ref())
.map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-')))
}
/// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid`
/// Look for the instance-uid in the `data.ms` or in `~/.config/MeiliSearch/path-to-db-instance-uid`
fn find_user_id(db_path: &Path) -> Option<String> {
fs::read_to_string(db_path.join("instance-uid"))
.ok()

View File

@@ -1,13 +1,12 @@
use std::collections::{BinaryHeap, HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::sync::Arc;
use std::time::{Duration, Instant};
use actix_web::http::header::USER_AGENT;
use actix_web::HttpRequest;
use http::header::CONTENT_TYPE;
use meilisearch_auth::SearchRules;
use meilisearch_lib::index::{SearchQuery, SearchResult};
use meilisearch_lib::index_controller::Stats;
use meilisearch_lib::MeiliSearch;
@@ -17,7 +16,6 @@ use segment::message::{Identify, Track, User};
use segment::{AutoBatcher, Batcher, HttpClient};
use serde_json::{json, Value};
use sysinfo::{DiskExt, System, SystemExt};
use time::OffsetDateTime;
use tokio::select;
use tokio::sync::mpsc::{self, Receiver, Sender};
use uuid::Uuid;
@@ -79,7 +77,7 @@ impl SegmentAnalytics {
let user = User::UserId { user_id };
let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string());
// If Meilisearch is Launched for the first time:
// If MeiliSearch is Launched for the first time:
// 1. Send an event Launched associated to the user `total_launch`.
// 2. Batch an event Launched with the real instance-id and send it in one hour.
if first_time_run {
@@ -212,30 +210,10 @@ impl Segment {
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
})
});
// The infos are all cli option except every option containing sensitive information.
// We consider an information as sensible if it contains a path, an address or a key.
let infos = {
// First we see if any sensitive fields were used.
let db_path = opt.db_path != PathBuf::from("./data.ms");
let import_dump = opt.import_dump.is_some();
let dumps_dir = opt.dumps_dir != PathBuf::from("dumps/");
let import_snapshot = opt.import_snapshot.is_some();
let snapshots_dir = opt.snapshot_dir != PathBuf::from("snapshots/");
let http_addr = opt.http_addr != "127.0.0.1:7700";
let mut infos = serde_json::to_value(opt).unwrap();
// Then we overwrite all sensitive field with a boolean representing if
// the feature was used or not.
infos["db_path"] = json!(db_path);
infos["import_dump"] = json!(import_dump);
infos["dumps_dir"] = json!(dumps_dir);
infos["import_snapshot"] = json!(import_snapshot);
infos["snapshot_dir"] = json!(snapshots_dir);
infos["http_addr"] = json!(http_addr);
infos
};
let infos = json!({
"env": opt.env.clone(),
"has_snapshot": opt.schedule_snapshot,
});
let number_of_documents = stats
.indexes
@@ -281,7 +259,7 @@ impl Segment {
}
async fn tick(&mut self, meilisearch: MeiliSearch) {
if let Ok(stats) = meilisearch.get_all_stats(&SearchRules::default()).await {
if let Ok(stats) = meilisearch.get_all_stats(&None).await {
let _ = self
.batcher
.push(Identify {
@@ -323,8 +301,6 @@ impl Segment {
#[derive(Default)]
pub struct SearchAggregator {
timestamp: Option<OffsetDateTime>,
// context
user_agents: HashSet<String>,
@@ -360,8 +336,6 @@ pub struct SearchAggregator {
impl SearchAggregator {
pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self {
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.total_received = 1;
ret.user_agents = extract_user_agents(request).into_iter().collect();
@@ -415,10 +389,6 @@ impl SearchAggregator {
/// Aggregate one [SearchAggregator] into another.
pub fn aggregate(&mut self, mut other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
// context
for user_agent in other.user_agents.into_iter() {
self.user_agents.insert(user_agent);
@@ -492,7 +462,6 @@ impl SearchAggregator {
});
Some(Track {
timestamp: self.timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
@@ -504,8 +473,6 @@ impl SearchAggregator {
#[derive(Default)]
pub struct DocumentsAggregator {
timestamp: Option<OffsetDateTime>,
// set to true when at least one request was received
updated: bool,
@@ -524,7 +491,6 @@ impl DocumentsAggregator {
request: &HttpRequest,
) -> Self {
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.updated = true;
ret.user_agents = extract_user_agents(request).into_iter().collect();
@@ -545,10 +511,6 @@ impl DocumentsAggregator {
/// Aggregate one [DocumentsAggregator] into another.
pub fn aggregate(&mut self, other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
self.updated |= other.updated;
// we can't create a union because there is no `into_union` method
for user_agent in other.user_agents.into_iter() {
@@ -575,7 +537,6 @@ impl DocumentsAggregator {
});
Some(Track {
timestamp: self.timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,

View File

@@ -5,7 +5,7 @@ pub enum AuthenticationError {
#[error("The Authorization header is missing. It must use the bearer authorization method.")]
MissingAuthorizationHeader,
#[error("The provided API key is invalid.")]
InvalidToken,
InvalidToken(String),
// Triggered on configuration error.
#[error("An internal error has occurred. `Irretrievable state`.")]
IrretrievableState,
@@ -15,7 +15,7 @@ impl ErrorCode for AuthenticationError {
fn error_code(&self) -> Code {
match self {
AuthenticationError::MissingAuthorizationHeader => Code::MissingAuthorizationHeader,
AuthenticationError::InvalidToken => Code::InvalidToken,
AuthenticationError::InvalidToken(_) => Code::InvalidToken,
AuthenticationError::IrretrievableState => Code::Internal,
}
}

View File

@@ -2,83 +2,28 @@ mod error;
use std::marker::PhantomData;
use std::ops::Deref;
use std::pin::Pin;
use actix_web::FromRequest;
use futures::future::err;
use futures::Future;
use meilisearch_error::{Code, ResponseError};
use futures::future::{ok, Ready};
use meilisearch_error::ResponseError;
use error::AuthenticationError;
use meilisearch_auth::{AuthController, AuthFilter};
pub struct GuardedData<P, D> {
pub struct GuardedData<T, D> {
data: D,
filters: AuthFilter,
_marker: PhantomData<P>,
_marker: PhantomData<T>,
}
impl<P, D> GuardedData<P, D> {
impl<T, D> GuardedData<T, D> {
pub fn filters(&self) -> &AuthFilter {
&self.filters
}
async fn auth_bearer(
auth: AuthController,
token: String,
index: Option<String>,
data: Option<D>,
) -> Result<Self, ResponseError>
where
P: Policy + 'static,
{
match Self::authenticate(auth, token, index).await? {
Some(filters) => match data {
Some(data) => Ok(Self {
data,
filters,
_marker: PhantomData,
}),
None => Err(AuthenticationError::IrretrievableState.into()),
},
None => Err(AuthenticationError::InvalidToken.into()),
}
}
async fn auth_token(auth: AuthController, data: Option<D>) -> Result<Self, ResponseError>
where
P: Policy + 'static,
{
match Self::authenticate(auth, String::new(), None).await? {
Some(filters) => match data {
Some(data) => Ok(Self {
data,
filters,
_marker: PhantomData,
}),
None => Err(AuthenticationError::IrretrievableState.into()),
},
None => Err(AuthenticationError::MissingAuthorizationHeader.into()),
}
}
async fn authenticate(
auth: AuthController,
token: String,
index: Option<String>,
) -> Result<Option<AuthFilter>, ResponseError>
where
P: Policy + 'static,
{
Ok(tokio::task::spawn_blocking(move || {
P::authenticate(auth, token.as_ref(), index.as_deref())
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))?)
}
}
impl<P, D> Deref for GuardedData<P, D> {
impl<T, D> Deref for GuardedData<T, D> {
type Target = D;
fn deref(&self) -> &Self::Target {
@@ -87,9 +32,11 @@ impl<P, D> Deref for GuardedData<P, D> {
}
impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D> {
type Config = ();
type Error = ResponseError;
type Future = Pin<Box<dyn Future<Output = Result<Self, Self::Error>>>>;
type Future = Ready<Result<Self, Self::Error>>;
fn from_request(
req: &actix_web::HttpRequest,
@@ -105,23 +52,37 @@ impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D>
Some("Bearer") => {
// TODO: find a less hardcoded way?
let index = req.match_info().get("index_uid");
match type_token.next() {
Some(token) => Box::pin(Self::auth_bearer(
auth,
token.to_string(),
index.map(String::from),
req.app_data::<D>().cloned(),
)),
None => Box::pin(err(AuthenticationError::InvalidToken.into())),
let token = type_token.next().unwrap_or("unknown");
match P::authenticate(auth, token, index) {
Some(filters) => match req.app_data::<D>().cloned() {
Some(data) => ok(Self {
data,
filters,
_marker: PhantomData,
}),
None => err(AuthenticationError::IrretrievableState.into()),
},
None => {
let token = token.to_string();
err(AuthenticationError::InvalidToken(token).into())
}
}
}
_otherwise => {
Box::pin(err(AuthenticationError::MissingAuthorizationHeader.into()))
}
_otherwise => err(AuthenticationError::MissingAuthorizationHeader.into()),
},
None => match P::authenticate(auth, "", None) {
Some(filters) => match req.app_data::<D>().cloned() {
Some(data) => ok(Self {
data,
filters,
_marker: PhantomData,
}),
None => err(AuthenticationError::IrretrievableState.into()),
},
None => err(AuthenticationError::MissingAuthorizationHeader.into()),
},
None => Box::pin(Self::auth_token(auth, req.app_data::<D>().cloned())),
},
None => Box::pin(err(AuthenticationError::IrretrievableState.into())),
None => err(AuthenticationError::IrretrievableState.into()),
}
}
}
@@ -131,22 +92,11 @@ pub trait Policy {
}
pub mod policies {
use jsonwebtoken::{dangerous_insecure_decode, decode, Algorithm, DecodingKey, Validation};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use crate::extractors::authentication::Policy;
use meilisearch_auth::{Action, AuthController, AuthFilter, SearchRules};
use meilisearch_auth::{Action, AuthController, AuthFilter};
// reexport actions in policies in order to be used in routes configuration.
pub use meilisearch_auth::actions;
pub static TENANT_TOKEN_VALIDATION: Lazy<Validation> = Lazy::new(|| Validation {
validate_exp: false,
algorithms: vec![Algorithm::HS256, Algorithm::HS384, Algorithm::HS512],
..Default::default()
});
pub struct MasterPolicy;
impl Policy for MasterPolicy {
@@ -178,81 +128,15 @@ pub mod policies {
return Some(AuthFilter::default());
}
// Tenant token
if let Some(filters) = ActionPolicy::<A>::authenticate_tenant_token(&auth, token, index)
{
return Some(filters);
} else if let Some(action) = Action::from_repr(A) {
// API key
// authenticate if token is allowed.
if let Some(action) = Action::from_repr(A) {
let index = index.map(|i| i.as_bytes());
if let Ok(true) = auth.authenticate(token.as_bytes(), action, index) {
return auth.get_key_filters(token, None).ok();
return auth.get_key_filters(token).ok();
}
}
None
}
}
impl<const A: u8> ActionPolicy<A> {
fn authenticate_tenant_token(
auth: &AuthController,
token: &str,
index: Option<&str>,
) -> Option<AuthFilter> {
// Only search action can be accessed by a tenant token.
if A != actions::SEARCH {
return None;
}
// get token fields without validating it.
let Claims {
search_rules,
exp,
api_key_prefix,
} = dangerous_insecure_decode::<Claims>(token).ok()?.claims;
// Check index access if an index restriction is provided.
if let Some(index) = index {
if !search_rules.is_index_authorized(index) {
return None;
}
}
// Check if token is expired.
if let Some(exp) = exp {
if OffsetDateTime::now_utc().unix_timestamp() > exp {
return None;
}
}
// check if parent key is authorized to do the action.
if auth
.is_key_authorized(api_key_prefix.as_bytes(), Action::Search, index)
.ok()?
{
// Check if tenant token is valid.
let key = auth.generate_key(&api_key_prefix)?;
decode::<Claims>(
token,
&DecodingKey::from_secret(key.as_bytes()),
&TENANT_TOKEN_VALIDATION,
)
.ok()?;
return auth
.get_key_filters(api_key_prefix, Some(search_rules))
.ok();
}
None
}
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Claims {
search_rules: SearchRules,
exp: Option<i64>,
api_key_prefix: String,
}
}

View File

@@ -1,4 +1,3 @@
pub mod payload;
#[macro_use]
pub mod authentication;
pub mod sequential_extractor;

View File

@@ -28,6 +28,8 @@ impl Default for PayloadConfig {
}
impl FromRequest for Payload {
type Config = PayloadConfig;
type Error = PayloadError;
type Future = Ready<Result<Payload, Self::Error>>;
@@ -37,7 +39,7 @@ impl FromRequest for Payload {
let limit = req
.app_data::<PayloadConfig>()
.map(|c| c.limit)
.unwrap_or(PayloadConfig::default().limit);
.unwrap_or(Self::Config::default().limit);
ready(Ok(Payload {
payload: payload.take(),
limit,

View File

@@ -1,148 +0,0 @@
#![allow(non_snake_case)]
use std::{future::Future, pin::Pin, task::Poll};
use actix_web::{dev::Payload, FromRequest, Handler, HttpRequest};
use pin_project_lite::pin_project;
/// `SeqHandler` is an actix `Handler` that enforces that extractors errors are returned in the
/// same order as they are defined in the wrapped handler. This is needed because, by default, actix
/// resolves the extractors concurrently, whereas we always need the authentication extractor to
/// throw first.
#[derive(Clone)]
pub struct SeqHandler<H>(pub H);
pub struct SeqFromRequest<T>(T);
/// This macro implements `FromRequest` for arbitrary arity handler, except for one, which is
/// useless anyway.
macro_rules! gen_seq {
($ty:ident; $($T:ident)+) => {
pin_project! {
pub struct $ty<$($T: FromRequest), +> {
$(
#[pin]
$T: ExtractFuture<$T::Future, $T, $T::Error>,
)+
}
}
impl<$($T: FromRequest), +> Future for $ty<$($T),+> {
type Output = Result<SeqFromRequest<($($T),+)>, actix_web::Error>;
fn poll(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<Self::Output> {
let mut this = self.project();
let mut count_fut = 0;
let mut count_finished = 0;
$(
count_fut += 1;
match this.$T.as_mut().project() {
ExtractProj::Future { fut } => match fut.poll(cx) {
Poll::Ready(Ok(output)) => {
count_finished += 1;
let _ = this
.$T
.as_mut()
.project_replace(ExtractFuture::Done { output });
}
Poll::Ready(Err(error)) => {
count_finished += 1;
let _ = this
.$T
.as_mut()
.project_replace(ExtractFuture::Error { error });
}
Poll::Pending => (),
},
ExtractProj::Done { .. } => count_finished += 1,
ExtractProj::Error { .. } => {
// short circuit if all previous are finished and we had an error.
if count_finished == count_fut {
match this.$T.project_replace(ExtractFuture::Empty) {
ExtractReplaceProj::Error { error } => {
return Poll::Ready(Err(error.into()))
}
_ => unreachable!("Invalid future state"),
}
} else {
count_finished += 1;
}
}
ExtractProj::Empty => unreachable!("From request polled after being finished. {}", stringify!($T)),
}
)+
if count_fut == count_finished {
let result = (
$(
match this.$T.project_replace(ExtractFuture::Empty) {
ExtractReplaceProj::Done { output } => output,
ExtractReplaceProj::Error { error } => return Poll::Ready(Err(error.into())),
_ => unreachable!("Invalid future state"),
},
)+
);
Poll::Ready(Ok(SeqFromRequest(result)))
} else {
Poll::Pending
}
}
}
impl<$($T: FromRequest,)+> FromRequest for SeqFromRequest<($($T,)+)> {
type Error = actix_web::Error;
type Future = $ty<$($T),+>;
fn from_request(req: &HttpRequest, payload: &mut Payload) -> Self::Future {
$ty {
$(
$T: ExtractFuture::Future {
fut: $T::from_request(req, payload),
},
)+
}
}
}
impl<Han, $($T: FromRequest),+> Handler<SeqFromRequest<($($T),+)>> for SeqHandler<Han>
where
Han: Handler<($($T),+)>,
{
type Output = Han::Output;
type Future = Han::Future;
fn call(&self, args: SeqFromRequest<($($T),+)>) -> Self::Future {
self.0.call(args.0)
}
}
};
}
// Not working for a single argument, but then, it is not really necessary.
// gen_seq! { SeqFromRequestFut1; A }
gen_seq! { SeqFromRequestFut2; A B }
gen_seq! { SeqFromRequestFut3; A B C }
gen_seq! { SeqFromRequestFut4; A B C D }
gen_seq! { SeqFromRequestFut5; A B C D E }
gen_seq! { SeqFromRequestFut6; A B C D E F }
pin_project! {
#[project = ExtractProj]
#[project_replace = ExtractReplaceProj]
enum ExtractFuture<Fut, Res, Err> {
Future {
#[pin]
fut: Fut,
},
Done {
output: Res,
},
Error {
error: Err,
},
Empty,
}
}

View File

@@ -9,7 +9,7 @@ pub mod helpers;
pub mod option;
pub mod routes;
use std::sync::{atomic::AtomicBool, Arc};
use std::sync::Arc;
use std::time::Duration;
use crate::error::MeilisearchHttpError;
@@ -25,29 +25,16 @@ use extractors::payload::PayloadConfig;
use meilisearch_auth::AuthController;
use meilisearch_lib::MeiliSearch;
pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false);
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
let mut meilisearch = MeiliSearch::builder();
// enable autobatching?
let _ = AUTOBATCHING_ENABLED.store(
opt.scheduler_options.enable_auto_batching,
std::sync::atomic::Ordering::Relaxed,
);
meilisearch
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
.set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize)
// snapshot
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
.set_dump_dst(opt.dumps_dir.clone())
.set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec))
.set_snapshot_dir(opt.snapshot_dir.clone())
// dump
.set_ignore_missing_dump(opt.ignore_missing_dump)
.set_ignore_dump_if_db_exists(opt.ignore_dump_if_db_exists)
.set_dump_dst(opt.dumps_dir.clone());
.set_snapshot_dir(opt.snapshot_dir.clone());
if let Some(ref path) = opt.import_snapshot {
meilisearch.set_import_snapshot(path.clone());
@@ -61,11 +48,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
meilisearch.set_schedule_snapshot();
}
meilisearch.build(
opt.db_path.clone(),
opt.indexer_options.clone(),
opt.scheduler_options.clone(),
)
meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone())
}
pub fn configure_data(
@@ -107,7 +90,7 @@ pub fn configure_data(
#[cfg(feature = "mini-dashboard")]
pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
use actix_web::HttpResponse;
use static_files::Resource;
use actix_web_static_files::Resource;
mod generated {
include!(concat!(env!("OUT_DIR"), "/generated.rs"));
@@ -122,13 +105,13 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
} = resource;
// Redirect index.html to /
if path == "index.html" {
config.service(web::resource("/").route(web::get().to(move || async move {
HttpResponse::Ok().content_type(mime_type).body(data)
})));
config.service(web::resource("/").route(
web::get().to(move || HttpResponse::Ok().content_type(mime_type).body(data)),
));
} else {
config.service(web::resource(path).route(web::get().to(move || async move {
HttpResponse::Ok().content_type(mime_type).body(data)
})));
config.service(web::resource(path).route(
web::get().to(move || HttpResponse::Ok().content_type(mime_type).body(data)),
));
}
}
} else {

View File

@@ -2,12 +2,12 @@ use std::env;
use std::sync::Arc;
use actix_web::HttpServer;
use clap::Parser;
use meilisearch_auth::AuthController;
use meilisearch_http::analytics;
use meilisearch_http::analytics::Analytics;
use meilisearch_http::{create_app, setup_meilisearch, Opt};
use meilisearch_lib::MeiliSearch;
use structopt::StructOpt;
#[cfg(target_os = "linux")]
#[global_allocator]
@@ -29,7 +29,7 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
let opt = Opt::parse();
let opt = Opt::from_args();
setup(&opt)?;
@@ -50,7 +50,7 @@ async fn main() -> anyhow::Result<()> {
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
let (analytics, user) = if !opt.no_analytics {
let (analytics, user) = if opt.analytics() {
analytics::SegmentAnalytics::new(&opt, &meilisearch).await
} else {
analytics::MockAnalytics::new(&opt)
@@ -101,14 +101,14 @@ pub fn print_launch_resume(opt: &Opt, user: &str) {
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
let ascii_name = r#"
888b d888 d8b 888 d8b 888
8888b d8888 Y8P 888 Y8P 888
88888b.d88888 888 888
888Y88888P888 .d88b. 888 888 888 .d8888b .d88b. 8888b. 888d888 .d8888b 88888b.
888 Y888P 888 d8P Y8b 888 888 888 88K d8P Y8b "88b 888P" d88P" 888 "88b
888 Y8P 888 88888888 888 888 888 "Y8888b. 88888888 .d888888 888 888 888 888
888 " 888 Y8b. 888 888 888 X88 Y8b. 888 888 888 Y88b. 888 888
888 888 "Y8888 888 888 888 88888P' "Y8888 "Y888888 888 "Y8888P 888 888
888b d888 d8b 888 d8b .d8888b. 888
8888b d8888 Y8P 888 Y8P d88P Y88b 888
88888b.d88888 888 Y88b. 888
888Y88888P888 .d88b. 888 888 888 "Y888b. .d88b. 8888b. 888d888 .d8888b 88888b.
888 Y888P 888 d8P Y8b 888 888 888 "Y88b. d8P Y8b "88b 888P" d88P" 888 "88b
888 Y8P 888 88888888 888 888 888 "888 88888888 .d888888 888 888 888 888
888 " 888 Y8b. 888 888 888 Y88b d88P Y8b. 888 888 888 Y88b. 888 888
888 888 "Y8888 888 888 888 "Y8888P" "Y8888 "Y888888 888 "Y8888P 888 888
"#;
eprintln!("{}", ascii_name);
@@ -125,10 +125,10 @@ pub fn print_launch_resume(opt: &Opt, user: &str) {
#[cfg(all(not(debug_assertions), feature = "analytics"))]
{
if !opt.no_analytics {
if opt.analytics() {
eprintln!(
"
Thank you for using Meilisearch!
Thank you for using MeiliSearch!
We collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html
@@ -146,7 +146,7 @@ Anonymous telemetry:\t\"Enabled\""
eprintln!();
if opt.master_key.is_some() {
eprintln!("A Master Key has been set. Requests to Meilisearch won't be authorized unless you provide an authentication key.");
eprintln!("A Master Key has been set. Requests to MeiliSearch won't be authorized unless you provide an authentication key.");
} else {
eprintln!("No master key found; The server will accept unidentified requests. \
If you need some protection in development mode, please export a key: export MEILI_MASTER_KEY=xxx");

View File

@@ -4,168 +4,144 @@ use std::path::PathBuf;
use std::sync::Arc;
use byte_unit::Byte;
use clap::Parser;
use meilisearch_lib::options::{IndexerOpts, SchedulerConfig};
use meilisearch_lib::options::IndexerOpts;
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use rustls::{
server::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient,
ServerSessionMemoryCache,
},
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
RootCertStore,
};
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use serde::Serialize;
use structopt::StructOpt;
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
#[derive(Debug, Clone, Parser, Serialize)]
#[derive(Debug, Clone, StructOpt)]
pub struct Opt {
/// The destination where the database must be created.
#[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
#[structopt(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
pub db_path: PathBuf,
/// The address on which the http server will listen.
#[clap(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")]
#[structopt(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")]
pub http_addr: String,
/// The master key allowing you to do everything on the server.
#[serde(skip)]
#[clap(long, env = "MEILI_MASTER_KEY")]
#[structopt(long, env = "MEILI_MASTER_KEY")]
pub master_key: Option<String>,
/// This environment variable must be set to `production` if you are running in production.
/// If the server is running in development mode more logs will be displayed,
/// and the master key can be avoided which implies that there is no security on the updates routes.
/// This is useful to debug when integrating the engine with another service.
#[clap(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)]
#[structopt(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)]
pub env: String,
/// Do not send analytics to Meili.
#[cfg(all(not(debug_assertions), feature = "analytics"))]
#[serde(skip)] // we can't send true
#[clap(long, env = "MEILI_NO_ANALYTICS")]
pub no_analytics: bool,
#[structopt(long, env = "MEILI_NO_ANALYTICS")]
pub no_analytics: Option<Option<bool>>,
/// The maximum size, in bytes, of the main lmdb database directory
#[clap(long, env = "MEILI_MAX_INDEX_SIZE", default_value = "100 GiB")]
#[structopt(long, env = "MEILI_MAX_INDEX_SIZE", default_value = "100 GiB")]
pub max_index_size: Byte,
/// The maximum size, in bytes, of the update lmdb database directory
#[clap(long, env = "MEILI_MAX_TASK_DB_SIZE", default_value = "100 GiB")]
#[structopt(long, env = "MEILI_MAX_TASK_DB_SIZE", default_value = "100 GiB")]
pub max_task_db_size: Byte,
/// The maximum size, in bytes, of accepted JSON payloads
#[clap(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "100 MB")]
#[structopt(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "100 MB")]
pub http_payload_size_limit: Byte,
/// Read server certificates from CERTFILE.
/// This should contain PEM-format certificates
/// in the right order (the first certificate should
/// certify KEYFILE, the last should be a root CA).
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))]
#[structopt(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))]
pub ssl_cert_path: Option<PathBuf>,
/// Read private key from KEYFILE. This should be a RSA
/// private key or PKCS8-encoded private key, in PEM format.
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))]
#[structopt(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))]
pub ssl_key_path: Option<PathBuf>,
/// Enable client authentication, and accept certificates
/// signed by those roots provided in CERTFILE.
#[clap(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))]
#[serde(skip)]
#[structopt(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))]
pub ssl_auth_path: Option<PathBuf>,
/// Read DER-encoded OCSP response from OCSPFILE and staple to certificate.
/// Optional
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))]
#[structopt(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))]
pub ssl_ocsp_path: Option<PathBuf>,
/// Send a fatal alert if the client does not complete client authentication.
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_REQUIRE_AUTH")]
#[structopt(long, env = "MEILI_SSL_REQUIRE_AUTH")]
pub ssl_require_auth: bool,
/// SSL support session resumption
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_RESUMPTION")]
#[structopt(long, env = "MEILI_SSL_RESUMPTION")]
pub ssl_resumption: bool,
/// SSL support tickets.
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_TICKETS")]
#[structopt(long, env = "MEILI_SSL_TICKETS")]
pub ssl_tickets: bool,
/// Defines the path of the snapshot file to import.
/// This option will, by default, stop the process if a database already exist or if no snapshot exists at
/// the given path. If this option is not specified no snapshot is imported.
#[clap(long)]
#[structopt(long)]
pub import_snapshot: Option<PathBuf>,
/// The engine will ignore a missing snapshot and not return an error in such case.
#[clap(long, requires = "import-snapshot")]
#[structopt(long, requires = "import-snapshot")]
pub ignore_missing_snapshot: bool,
/// The engine will skip snapshot importation and not return an error in such case.
#[clap(long, requires = "import-snapshot")]
#[structopt(long, requires = "import-snapshot")]
pub ignore_snapshot_if_db_exists: bool,
/// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap.
#[clap(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")]
#[structopt(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")]
pub snapshot_dir: PathBuf,
/// Activate snapshot scheduling.
#[clap(long, env = "MEILI_SCHEDULE_SNAPSHOT")]
#[structopt(long, env = "MEILI_SCHEDULE_SNAPSHOT")]
pub schedule_snapshot: bool,
/// Defines time interval, in seconds, between each snapshot creation.
#[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h
#[structopt(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h
pub snapshot_interval_sec: u64,
/// Import a dump from the specified path, must be a `.dump` file.
#[clap(long, conflicts_with = "import-snapshot")]
pub import_dump: Option<PathBuf>,
/// If the dump doesn't exists, load or create the database specified by `db-path` instead.
#[clap(long, requires = "import-dump")]
pub ignore_missing_dump: bool,
/// Ignore the dump if a database already exists, and load that database instead.
#[clap(long, requires = "import-dump")]
pub ignore_dump_if_db_exists: bool,
/// Folder where dumps are created when the dump route is called.
#[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
#[structopt(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
pub dumps_dir: PathBuf,
/// Import a dump from the specified path, must be a `.dump` file.
#[structopt(long, conflicts_with = "import-snapshot")]
pub import_dump: Option<PathBuf>,
/// Set the log level
#[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")]
#[structopt(long, env = "MEILI_LOG_LEVEL", default_value = "info")]
pub log_level: String,
#[serde(skip)]
#[clap(skip)]
#[structopt(skip)]
pub indexer_options: IndexerOpts,
#[serde(flatten)]
#[clap(flatten)]
pub scheduler_options: SchedulerConfig,
}
impl Opt {
/// Wether analytics should be enabled or not.
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub fn analytics(&self) -> bool {
!self.no_analytics
match self.no_analytics {
None => true,
Some(None) => false,
Some(Some(disabled)) => !disabled,
}
}
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
let config = rustls::ServerConfig::builder().with_safe_defaults();
let config = match &self.ssl_auth_path {
let client_auth = match &self.ssl_auth_path {
Some(auth_path) => {
let roots = load_certs(auth_path.to_path_buf())?;
let mut client_auth_roots = RootCertStore::empty();
@@ -173,32 +149,30 @@ impl Opt {
client_auth_roots.add(&root).unwrap();
}
if self.ssl_require_auth {
let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots);
config.with_client_cert_verifier(verifier)
AllowAnyAuthenticatedClient::new(client_auth_roots)
} else {
let verifier =
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots);
config.with_client_cert_verifier(verifier)
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots)
}
}
None => config.with_no_client_auth(),
None => NoClientAuth::new(),
};
let mut config = rustls::ServerConfig::new(client_auth);
config.key_log = Arc::new(rustls::KeyLogFile::new());
let certs = load_certs(cert_path.to_path_buf())?;
let privkey = load_private_key(key_path.to_path_buf())?;
let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
let mut config = config
.with_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
config
.set_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
.map_err(|_| anyhow::anyhow!("bad certificates/private key"))?;
config.key_log = Arc::new(rustls::KeyLogFile::new());
if self.ssl_resumption {
config.session_storage = ServerSessionMemoryCache::new(256);
config.set_persistence(rustls::ServerSessionMemoryCache::new(256));
}
if self.ssl_tickets {
config.ticketer = rustls::Ticketer::new().unwrap();
config.ticketer = rustls::Ticketer::new();
}
Ok(Some(config))
@@ -212,9 +186,7 @@ fn load_certs(filename: PathBuf) -> anyhow::Result<Vec<rustls::Certificate>> {
let certfile =
fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?;
let mut reader = BufReader::new(certfile);
certs(&mut reader)
.map(|certs| certs.into_iter().map(rustls::Certificate).collect())
.map_err(|_| anyhow::anyhow!("cannot read certificate file"))
certs(&mut reader).map_err(|_| anyhow::anyhow!("cannot read certificate file"))
}
fn load_private_key(filename: PathBuf) -> anyhow::Result<rustls::PrivateKey> {
@@ -239,10 +211,10 @@ fn load_private_key(filename: PathBuf) -> anyhow::Result<rustls::PrivateKey> {
// prefer to load pkcs8 keys
if !pkcs8_keys.is_empty() {
Ok(rustls::PrivateKey(pkcs8_keys[0].clone()))
Ok(pkcs8_keys[0].clone())
} else {
assert!(!rsa_keys.is_empty());
Ok(rustls::PrivateKey(rsa_keys[0].clone()))
Ok(rsa_keys[0].clone())
}
}

View File

@@ -1,29 +1,26 @@
use std::str;
use actix_web::{web, HttpRequest, HttpResponse};
use chrono::SecondsFormat;
use meilisearch_auth::{error::AuthControllerError, Action, AuthController, Key};
use meilisearch_auth::{generate_key, Action, AuthController, Key};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use time::OffsetDateTime;
use crate::extractors::{
authentication::{policies::*, GuardedData},
sequential_extractor::SeqHandler,
};
use meilisearch_error::{Code, ResponseError};
use crate::extractors::authentication::{policies::*, GuardedData};
use meilisearch_error::ResponseError;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::post().to(SeqHandler(create_api_key)))
.route(web::get().to(SeqHandler(list_api_keys))),
.route(web::post().to(create_api_key))
.route(web::get().to(list_api_keys)),
)
.service(
web::resource("/{api_key}")
.route(web::get().to(SeqHandler(get_api_key)))
.route(web::patch().to(SeqHandler(patch_api_key)))
.route(web::delete().to(SeqHandler(delete_api_key))),
.route(web::get().to(get_api_key))
.route(web::patch().to(patch_api_key))
.route(web::delete().to(delete_api_key)),
);
}
@@ -32,13 +29,8 @@ pub async fn create_api_key(
body: web::Json<Value>,
_req: HttpRequest,
) -> Result<HttpResponse, ResponseError> {
let v = body.into_inner();
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let key = auth_controller.create_key(v)?;
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
let key = auth_controller.create_key(body.into_inner()).await?;
let res = KeyView::from_key(key, auth_controller.get_master_key());
Ok(HttpResponse::Created().json(res))
}
@@ -47,16 +39,11 @@ pub async fn list_api_keys(
auth_controller: GuardedData<MasterPolicy, AuthController>,
_req: HttpRequest,
) -> Result<HttpResponse, ResponseError> {
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let keys = auth_controller.list_keys()?;
let res: Vec<_> = keys
.into_iter()
.map(|k| KeyView::from_key(k, &auth_controller))
.collect();
Ok(res)
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
let keys = auth_controller.list_keys().await?;
let res: Vec<_> = keys
.into_iter()
.map(|k| KeyView::from_key(k, auth_controller.get_master_key()))
.collect();
Ok(HttpResponse::Ok().json(KeyListView::from(res)))
}
@@ -65,13 +52,9 @@ pub async fn get_api_key(
auth_controller: GuardedData<MasterPolicy, AuthController>,
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let api_key = path.into_inner().api_key;
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let key = auth_controller.get_key(&api_key)?;
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
// keep 8 first characters that are the ID of the API key.
let key = auth_controller.get_key(&path.api_key).await?;
let res = KeyView::from_key(key, auth_controller.get_master_key());
Ok(HttpResponse::Ok().json(res))
}
@@ -81,14 +64,11 @@ pub async fn patch_api_key(
body: web::Json<Value>,
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let api_key = path.into_inner().api_key;
let body = body.into_inner();
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let key = auth_controller.update_key(&api_key, body)?;
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
let key = auth_controller
// keep 8 first characters that are the ID of the API key.
.update_key(&path.api_key, body.into_inner())
.await?;
let res = KeyView::from_key(key, auth_controller.get_master_key());
Ok(HttpResponse::Ok().json(res))
}
@@ -97,10 +77,8 @@ pub async fn delete_api_key(
auth_controller: GuardedData<MasterPolicy, AuthController>,
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let api_key = path.into_inner().api_key;
tokio::task::spawn_blocking(move || auth_controller.delete_key(&api_key))
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
// keep 8 first characters that are the ID of the API key.
auth_controller.delete_key(&path.api_key).await?;
Ok(HttpResponse::NoContent().finish())
}
@@ -117,27 +95,29 @@ struct KeyView {
key: String,
actions: Vec<Action>,
indexes: Vec<String>,
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
expires_at: Option<OffsetDateTime>,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
created_at: OffsetDateTime,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
updated_at: OffsetDateTime,
expires_at: Option<String>,
created_at: String,
updated_at: String,
}
impl KeyView {
fn from_key(key: Key, auth: &AuthController) -> Self {
fn from_key(key: Key, master_key: Option<&String>) -> Self {
let key_id = str::from_utf8(&key.id).unwrap();
let generated_key = auth.generate_key(key_id).unwrap_or_default();
let generated_key = match master_key {
Some(master_key) => generate_key(master_key.as_bytes(), key_id),
None => generate_key(&[], key_id),
};
KeyView {
description: key.description,
key: generated_key,
actions: key.actions,
indexes: key.indexes,
expires_at: key.expires_at,
created_at: key.created_at,
updated_at: key.updated_at,
expires_at: key
.expires_at
.map(|dt| dt.to_rfc3339_opts(SecondsFormat::Secs, true)),
created_at: key.created_at.to_rfc3339_opts(SecondsFormat::Secs, true),
updated_at: key.updated_at.to_rfc3339_opts(SecondsFormat::Secs, true),
}
}
}

View File

@@ -7,13 +7,10 @@ use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))))
.service(
web::resource("/{dump_uid}/status").route(web::get().to(SeqHandler(get_dump_status))),
);
cfg.service(web::resource("").route(web::post().to(create_dump)))
.service(web::resource("/{dump_uid}/status").route(web::get().to(get_dump_status)));
}
pub async fn create_dump(

View File

@@ -20,7 +20,6 @@ use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::task::SummarizedTaskView;
const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0;
@@ -72,17 +71,17 @@ pub struct DocumentParam {
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(get_all_documents)))
.route(web::post().to(SeqHandler(add_documents)))
.route(web::put().to(SeqHandler(update_documents)))
.route(web::delete().to(SeqHandler(clear_all_documents))),
.route(web::get().to(get_all_documents))
.route(web::post().to(add_documents))
.route(web::put().to(update_documents))
.route(web::delete().to(clear_all_documents)),
)
// this route needs to be before the /documents/{document_id} to match properly
.service(web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents))))
.service(web::resource("/delete-batch").route(web::post().to(delete_documents)))
.service(
web::resource("/{document_id}")
.route(web::get().to(SeqHandler(get_document)))
.route(web::delete().to(SeqHandler(delete_document))),
.route(web::get().to(get_document))
.route(web::delete().to(delete_document)),
);
}

View File

@@ -1,15 +1,14 @@
use actix_web::{web, HttpRequest, HttpResponse};
use chrono::{DateTime, Utc};
use log::debug;
use meilisearch_error::ResponseError;
use meilisearch_lib::index_controller::Update;
use meilisearch_lib::MeiliSearch;
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::task::SummarizedTaskView;
pub mod documents;
@@ -21,17 +20,17 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(list_indexes))
.route(web::post().to(SeqHandler(create_index))),
.route(web::post().to(create_index)),
)
.service(
web::scope("/{index_uid}")
.service(
web::resource("")
.route(web::get().to(SeqHandler(get_index)))
.route(web::put().to(SeqHandler(update_index)))
.route(web::delete().to(SeqHandler(delete_index))),
.route(web::get().to(get_index))
.route(web::put().to(update_index))
.route(web::delete().to(delete_index)),
)
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
.service(web::resource("/stats").route(web::get().to(get_index_stats)))
.service(web::scope("/documents").configure(documents::configure))
.service(web::scope("/search").configure(search::configure))
.service(web::scope("/tasks").configure(tasks::configure))
@@ -42,13 +41,14 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
pub async fn list_indexes(
data: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, MeiliSearch>,
) -> Result<HttpResponse, ResponseError> {
let search_rules = &data.filters().search_rules;
let indexes: Vec<_> = data
.list_indexes()
.await?
.into_iter()
.filter(|i| search_rules.is_index_authorized(&i.uid))
.collect();
let filters = data.filters();
let mut indexes = data.list_indexes().await?;
if let Some(indexes_filter) = filters.indexes.as_ref() {
indexes = indexes
.into_iter()
.filter(|i| indexes_filter.contains(&i.uid))
.collect();
}
debug!("returns: {:?}", indexes);
Ok(HttpResponse::Ok().json(indexes))
@@ -96,12 +96,9 @@ pub struct UpdateIndexRequest {
pub struct UpdateIndexResponse {
name: String,
uid: String,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
created_at: OffsetDateTime,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
updated_at: OffsetDateTime,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
primary_key: OffsetDateTime,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
primary_key: Option<String>,
}
pub async fn get_index(

View File

@@ -1,6 +1,5 @@
use actix_web::{web, HttpRequest, HttpResponse};
use log::debug;
use meilisearch_auth::IndexSearchRules;
use meilisearch_error::ResponseError;
use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
use meilisearch_lib::MeiliSearch;
@@ -9,13 +8,12 @@ use serde_json::Value;
use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(search_with_url_query)))
.route(web::post().to(SeqHandler(search_with_post))),
.route(web::get().to(search_with_url_query))
.route(web::post().to(search_with_post)),
);
}
@@ -81,26 +79,6 @@ impl From<SearchQueryGet> for SearchQuery {
}
}
/// Incorporate search rules in search query
fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
(None, rules_filter) => rules_filter,
(filter, None) => filter,
(Some(filter), Some(rules_filter)) => {
let filter = match filter {
Value::Array(filter) => filter,
filter => vec![filter],
};
let rules_filter = match rules_filter {
Value::Array(rules_filter) => rules_filter,
rules_filter => vec![rules_filter],
};
Some(Value::Array([filter, rules_filter].concat()))
}
}
}
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
/// Transform the sort query parameter into something that matches the post expected format.
@@ -135,21 +113,11 @@ pub async fn search_with_url_query(
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let mut query: SearchQuery = params.into_inner().into();
let index_uid = path.into_inner();
// Tenant token search_rules.
if let Some(search_rules) = meilisearch
.filters()
.search_rules
.get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let query: SearchQuery = params.into_inner().into();
let mut aggregate = SearchAggregator::from_query(&query, &req);
let search_result = meilisearch.search(index_uid, query).await;
let search_result = meilisearch.search(path.into_inner(), query).await;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
@@ -172,22 +140,12 @@ pub async fn search_with_post(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let mut query = params.into_inner();
let query = params.into_inner();
debug!("search called with params: {:?}", query);
let index_uid = path.into_inner();
// Tenant token search_rules.
if let Some(search_rules) = meilisearch
.filters()
.search_rules
.get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let mut aggregate = SearchAggregator::from_query(&query, &req);
let search_result = meilisearch.search(index_uid, query).await;
let search_result = meilisearch.search(path.into_inner(), query).await;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}

View File

@@ -23,7 +23,6 @@ macro_rules! make_setting_route {
use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::task::SummarizedTaskView;
use meilisearch_error::ResponseError;
@@ -99,9 +98,9 @@ macro_rules! make_setting_route {
pub fn resources() -> Resource {
Resource::new($route)
.route(web::get().to(SeqHandler(get)))
.route(web::post().to(SeqHandler(update)))
.route(web::delete().to(SeqHandler(delete)))
.route(web::get().to(get))
.route(web::post().to(update))
.route(web::delete().to(delete))
}
}
};
@@ -227,12 +226,11 @@ make_setting_route!(
macro_rules! generate_configure {
($($mod:ident),*) => {
pub fn configure(cfg: &mut web::ServiceConfig) {
use crate::extractors::sequential_extractor::SeqHandler;
cfg.service(
web::resource("")
.route(web::post().to(SeqHandler(update_all)))
.route(web::get().to(SeqHandler(get_all)))
.route(web::delete().to(SeqHandler(delete_all))))
.route(web::post().to(update_all))
.route(web::get().to(get_all))
.route(web::delete().to(delete_all)))
$(.service($mod::resources()))*;
}
};

View File

@@ -1,19 +1,18 @@
use actix_web::{web, HttpRequest, HttpResponse};
use chrono::{DateTime, Utc};
use log::debug;
use meilisearch_error::ResponseError;
use meilisearch_lib::MeiliSearch;
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::task::{TaskListView, TaskView};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::get().to(SeqHandler(get_all_tasks_status))))
.service(web::resource("{task_id}").route(web::get().to(SeqHandler(get_task_status))));
cfg.service(web::resource("").route(web::get().to(get_all_tasks_status)))
.service(web::resource("{task_id}").route(web::get().to(get_task_status)));
}
#[derive(Debug, Serialize)]
@@ -21,12 +20,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
pub struct UpdateIndexResponse {
name: String,
uid: String,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
created_at: OffsetDateTime,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
updated_at: OffsetDateTime,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
primary_key: OffsetDateTime,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
primary_key: Option<String>,
}
#[derive(Deserialize)]

View File

@@ -1,7 +1,7 @@
use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use log::debug;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use meilisearch_error::ResponseError;
use meilisearch_lib::index::{Settings, Unchecked};
@@ -54,10 +54,8 @@ pub struct ProcessedUpdateResult {
#[serde(rename = "type")]
pub update_type: UpdateType,
pub duration: f64, // in seconds
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
pub enqueued_at: DateTime<Utc>,
pub processed_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -68,10 +66,8 @@ pub struct FailedUpdateResult {
pub update_type: UpdateType,
pub error: ResponseError,
pub duration: f64, // in seconds
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
pub enqueued_at: DateTime<Utc>,
pub processed_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -80,13 +76,9 @@ pub struct EnqueuedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(
skip_serializing_if = "Option::is_none",
with = "time::serde::rfc3339::option"
)]
pub started_processing_at: Option<OffsetDateTime>,
pub enqueued_at: DateTime<Utc>,
#[serde(skip_serializing_if = "Option::is_none")]
pub started_processing_at: Option<DateTime<Utc>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -125,18 +117,19 @@ impl IndexUpdateResponse {
/// Always return a 200 with:
/// ```json
/// {
/// "status": "Meilisearch is running"
/// "status": "MeiliSearch is running"
/// }
/// ```
pub async fn running() -> HttpResponse {
HttpResponse::Ok().json(serde_json::json!({ "status": "Meilisearch is running" }))
HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" }))
}
async fn get_stats(
meilisearch: GuardedData<ActionPolicy<{ actions::STATS_GET }>, MeiliSearch>,
) -> Result<HttpResponse, ResponseError> {
let search_rules = &meilisearch.filters().search_rules;
let response = meilisearch.get_all_stats(search_rules).await?;
let filters = meilisearch.filters();
let response = meilisearch.get_all_stats(&filters.indexes).await?;
debug!("returns: {:?}", response);
Ok(HttpResponse::Ok().json(response))

View File

@@ -7,12 +7,11 @@ use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::task::{TaskListView, TaskView};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::get().to(SeqHandler(get_tasks))))
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
cfg.service(web::resource("").route(web::get().to(get_tasks)))
.service(web::resource("/{task_id}").route(web::get().to(get_task)));
}
async fn get_tasks(
@@ -26,16 +25,13 @@ async fn get_tasks(
Some(&req),
);
let search_rules = &meilisearch.filters().search_rules;
let filters = if search_rules.is_index_authorized("*") {
None
} else {
let filters = meilisearch.filters().indexes.as_ref().map(|indexes| {
let mut filters = TaskFilter::default();
for (index, _policy) in search_rules.clone() {
filters.filter_index(index);
for index in indexes {
filters.filter_index(index.to_string());
}
Some(filters)
};
filters
});
let tasks: TaskListView = meilisearch
.list_tasks(filters, None, None)
@@ -60,16 +56,13 @@ async fn get_task(
Some(&req),
);
let search_rules = &meilisearch.filters().search_rules;
let filters = if search_rules.is_index_authorized("*") {
None
} else {
let filters = meilisearch.filters().indexes.as_ref().map(|indexes| {
let mut filters = TaskFilter::default();
for (index, _policy) in search_rules.clone() {
filters.filter_index(index);
for index in indexes {
filters.filter_index(index.to_string());
}
Some(filters)
};
filters
});
let task: TaskView = meilisearch
.get_task(task_id.into_inner(), filters)

View File

@@ -1,17 +1,11 @@
use std::fmt::Write;
use std::write;
use chrono::{DateTime, Duration, Utc};
use meilisearch_error::ResponseError;
use meilisearch_lib::index::{Settings, Unchecked};
use meilisearch_lib::milli::update::IndexDocumentsMethod;
use meilisearch_lib::tasks::batch::BatchId;
use meilisearch_lib::tasks::task::{
DocumentDeletion, Task, TaskContent, TaskEvent, TaskId, TaskResult,
};
use serde::{Serialize, Serializer};
use time::{Duration, OffsetDateTime};
use crate::AUTOBATCHING_ENABLED;
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
@@ -82,52 +76,14 @@ enum TaskDetails {
ClearAll { deleted_documents: Option<u64> },
}
/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for
/// https://github.com/time-rs/time/issues/378.
/// This code is a port of the old code of time that was removed in 0.2.
fn serialize_duration<S: Serializer>(
duration: &Option<Duration>,
serializer: S,
) -> Result<S::Ok, S::Error> {
match duration {
Some(duration) => {
// technically speaking, negative duration is not valid ISO 8601
if duration.is_negative() {
return serializer.serialize_none();
}
const SECS_PER_DAY: i64 = Duration::DAY.whole_seconds();
let secs = duration.whole_seconds();
let days = secs / SECS_PER_DAY;
let secs = secs - days * SECS_PER_DAY;
let hasdate = days != 0;
let nanos = duration.subsec_nanoseconds();
let hastime = (secs != 0 || nanos != 0) || !hasdate;
// all the following unwrap can't fail
let mut res = String::new();
write!(&mut res, "P").unwrap();
if hasdate {
write!(&mut res, "{}D", days).unwrap();
}
const NANOS_PER_MILLI: i32 = Duration::MILLISECOND.subsec_nanoseconds();
const NANOS_PER_MICRO: i32 = Duration::MICROSECOND.subsec_nanoseconds();
if hastime {
if nanos == 0 {
write!(&mut res, "T{}S", secs).unwrap();
} else if nanos % NANOS_PER_MILLI == 0 {
write!(&mut res, "T{}.{:03}S", secs, nanos / NANOS_PER_MILLI).unwrap();
} else if nanos % NANOS_PER_MICRO == 0 {
write!(&mut res, "T{}.{:06}S", secs, nanos / NANOS_PER_MICRO).unwrap();
} else {
write!(&mut res, "T{}.{:09}S", secs, nanos).unwrap();
}
}
serializer.serialize_str(&res)
let duration_str = duration.to_string();
serializer.serialize_str(&duration_str)
}
None => serializer.serialize_none(),
}
@@ -147,14 +103,9 @@ pub struct TaskView {
error: Option<ResponseError>,
#[serde(serialize_with = "serialize_duration")]
duration: Option<Duration>,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
enqueued_at: OffsetDateTime,
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
started_at: Option<OffsetDateTime>,
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
finished_at: Option<OffsetDateTime>,
#[serde(skip_serializing_if = "Option::is_none")]
batch_uid: Option<Option<BatchId>>,
enqueued_at: DateTime<Utc>,
started_at: Option<DateTime<Utc>>,
finished_at: Option<DateTime<Utc>>,
}
impl From<Task> for TaskView {
@@ -301,16 +252,6 @@ impl From<Task> for TaskView {
let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts));
let batch_uid = if AUTOBATCHING_ENABLED.load(std::sync::atomic::Ordering::Relaxed) {
let id = events.iter().find_map(|e| match e {
TaskEvent::Batched { batch_id, .. } => Some(*batch_id),
_ => None,
});
Some(id)
} else {
None
};
Self {
uid: id,
index_uid: index_uid.into_inner(),
@@ -322,7 +263,6 @@ impl From<Task> for TaskView {
enqueued_at,
started_at,
finished_at,
batch_uid,
}
}
}
@@ -346,8 +286,7 @@ pub struct SummarizedTaskView {
status: TaskStatus,
#[serde(rename = "type")]
task_type: TaskType,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
enqueued_at: OffsetDateTime,
enqueued_at: DateTime<Utc>,
}
impl From<Task> for SummarizedTaskView {

View File

@@ -1,6 +1,6 @@
use crate::common::Server;
use assert_json_diff::assert_json_include;
use serde_json::{json, Value};
use serde_json::json;
use std::{thread, time};
#[actix_rt::test]
@@ -155,41 +155,6 @@ async fn add_valid_api_key_no_description() {
assert_eq!(code, 201);
}
#[actix_rt::test]
async fn add_valid_api_key_null_description() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
let content = json!({
"description": Value::Null,
"indexes": ["products"],
"actions": [
"documents.add"
],
"expiresAt": "2050-11-13T00:00:00"
});
let (response, code) = server.add_api_key(content).await;
assert!(response["key"].is_string());
assert!(response["expiresAt"].is_string());
assert!(response["createdAt"].is_string());
assert!(response["updatedAt"].is_string());
let expected_response = json!({
"actions": [
"documents.add"
],
"indexes": [
"products"
],
"expiresAt": "2050-11-13T00:00:00Z"
});
assert_json_include!(actual: response, expected: expected_response);
assert_eq!(code, 201);
}
#[actix_rt::test]
async fn error_add_api_key_no_header() {
let server = Server::new_auth().await;
@@ -257,7 +222,7 @@ async fn error_add_api_key_missing_parameter() {
"message": "`indexes` field is mandatory.",
"code": "missing_parameter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_parameter"
"link":"https://docs.meilisearch.com/errors#missing_parameter"
});
assert_eq!(response, expected_response);
@@ -275,7 +240,7 @@ async fn error_add_api_key_missing_parameter() {
"message": "`actions` field is mandatory.",
"code": "missing_parameter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_parameter"
"link":"https://docs.meilisearch.com/errors#missing_parameter"
});
assert_eq!(response, expected_response);
@@ -293,7 +258,7 @@ async fn error_add_api_key_missing_parameter() {
"message": "`expiresAt` field is mandatory.",
"code": "missing_parameter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_parameter"
"link":"https://docs.meilisearch.com/errors#missing_parameter"
});
assert_eq!(response, expected_response);
@@ -316,7 +281,7 @@ async fn error_add_api_key_invalid_parameters_description() {
let (response, code) = server.add_api_key(content).await;
let expected_response = json!({
"message": r#"`description` field value `{"name":"products"}` is invalid. It should be a string or specified as a null value."#,
"message": r#"description field value `{"name":"products"}` is invalid. It should be a string or specified as a null value."#,
"code": "invalid_api_key_description",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_description"
@@ -342,7 +307,7 @@ async fn error_add_api_key_invalid_parameters_indexes() {
let (response, code) = server.add_api_key(content).await;
let expected_response = json!({
"message": r#"`indexes` field value `{"name":"products"}` is invalid. It should be an array of string representing index names."#,
"message": r#"indexes field value `{"name":"products"}` is invalid. It should be an array of string representing index names."#,
"code": "invalid_api_key_indexes",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_indexes"
@@ -366,7 +331,7 @@ async fn error_add_api_key_invalid_parameters_actions() {
let (response, code) = server.add_api_key(content).await;
let expected_response = json!({
"message": r#"`actions` field value `{"name":"products"}` is invalid. It should be an array of string representing action names."#,
"message": r#"actions field value `{"name":"products"}` is invalid. It should be an array of string representing action names."#,
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
@@ -386,7 +351,7 @@ async fn error_add_api_key_invalid_parameters_actions() {
let (response, code) = server.add_api_key(content).await;
let expected_response = json!({
"message": r#"`actions` field value `["doc.add"]` is invalid. It should be an array of string representing action names."#,
"message": r#"actions field value `["doc.add"]` is invalid. It should be an array of string representing action names."#,
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
@@ -412,7 +377,7 @@ async fn error_add_api_key_invalid_parameters_expires_at() {
let (response, code) = server.add_api_key(content).await;
let expected_response = json!({
"message": r#"`expiresAt` field value `{"name":"products"}` is invalid. It should follow the RFC 3339 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'."#,
"message": r#"expiresAt field value `{"name":"products"}` is invalid. It should be in ISO-8601 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DDTHH:MM:SS'."#,
"code": "invalid_api_key_expires_at",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_expires_at"
@@ -438,7 +403,7 @@ async fn error_add_api_key_invalid_parameters_expires_at_in_the_past() {
let (response, code) = server.add_api_key(content).await;
let expected_response = json!({
"message": r#"`expiresAt` field value `"2010-11-13T00:00:00Z"` is invalid. It should follow the RFC 3339 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'."#,
"message": r#"expiresAt field value `"2010-11-13T00:00:00Z"` is invalid. It should be in ISO-8601 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DDTHH:MM:SS'."#,
"code": "invalid_api_key_expires_at",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_expires_at"
@@ -1213,7 +1178,7 @@ async fn error_patch_api_key_indexes_invalid_parameters() {
let (response, code) = server.patch_api_key(&key, content).await;
let expected_response = json!({
"message": "`description` field value `13` is invalid. It should be a string or specified as a null value.",
"message": "description field value `13` is invalid. It should be a string or specified as a null value.",
"code": "invalid_api_key_description",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_description"
@@ -1230,7 +1195,7 @@ async fn error_patch_api_key_indexes_invalid_parameters() {
let (response, code) = server.patch_api_key(&key, content).await;
let expected_response = json!({
"message": "`indexes` field value `13` is invalid. It should be an array of string representing index names.",
"message": "indexes field value `13` is invalid. It should be an array of string representing index names.",
"code": "invalid_api_key_indexes",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_indexes"
@@ -1246,7 +1211,7 @@ async fn error_patch_api_key_indexes_invalid_parameters() {
let (response, code) = server.patch_api_key(&key, content).await;
let expected_response = json!({
"message": "`actions` field value `13` is invalid. It should be an array of string representing action names.",
"message": "actions field value `13` is invalid. It should be an array of string representing action names.",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
@@ -1262,7 +1227,7 @@ async fn error_patch_api_key_indexes_invalid_parameters() {
let (response, code) = server.patch_api_key(&key, content).await;
let expected_response = json!({
"message": "`expiresAt` field value `13` is invalid. It should follow the RFC 3339 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'.",
"message": "expiresAt field value `13` is invalid. It should be in ISO-8601 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DDTHH:MM:SS'.",
"code": "invalid_api_key_expires_at",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_expires_at"

View File

@@ -1,62 +1,56 @@
use crate::common::Server;
use ::time::format_description::well_known::Rfc3339;
use maplit::{hashmap, hashset};
use chrono::{Duration, Utc};
use maplit::hashmap;
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use std::collections::{HashMap, HashSet};
use time::{Duration, OffsetDateTime};
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), &'static str>> =
Lazy::new(|| {
hashmap! {
("POST", "/indexes/products/search") => hashset!{"search", "*"},
("GET", "/indexes/products/search") => hashset!{"search", "*"},
("POST", "/indexes/products/documents") => hashset!{"documents.add", "*"},
("GET", "/indexes/products/documents") => hashset!{"documents.get", "*"},
("GET", "/indexes/products/documents/0") => hashset!{"documents.get", "*"},
("DELETE", "/indexes/products/documents/0") => hashset!{"documents.delete", "*"},
("GET", "/tasks") => hashset!{"tasks.get", "*"},
("GET", "/indexes/products/tasks") => hashset!{"tasks.get", "*"},
("GET", "/indexes/products/tasks/0") => hashset!{"tasks.get", "*"},
("PUT", "/indexes/products/") => hashset!{"indexes.update", "*"},
("GET", "/indexes/products/") => hashset!{"indexes.get", "*"},
("DELETE", "/indexes/products/") => hashset!{"indexes.delete", "*"},
("POST", "/indexes") => hashset!{"indexes.create", "*"},
("GET", "/indexes") => hashset!{"indexes.get", "*"},
("GET", "/indexes/products/settings") => hashset!{"settings.get", "*"},
("GET", "/indexes/products/settings/displayed-attributes") => hashset!{"settings.get", "*"},
("GET", "/indexes/products/settings/distinct-attribute") => hashset!{"settings.get", "*"},
("GET", "/indexes/products/settings/filterable-attributes") => hashset!{"settings.get", "*"},
("GET", "/indexes/products/settings/ranking-rules") => hashset!{"settings.get", "*"},
("GET", "/indexes/products/settings/searchable-attributes") => hashset!{"settings.get", "*"},
("GET", "/indexes/products/settings/sortable-attributes") => hashset!{"settings.get", "*"},
("GET", "/indexes/products/settings/stop-words") => hashset!{"settings.get", "*"},
("GET", "/indexes/products/settings/synonyms") => hashset!{"settings.get", "*"},
("DELETE", "/indexes/products/settings") => hashset!{"settings.update", "*"},
("POST", "/indexes/products/settings") => hashset!{"settings.update", "*"},
("POST", "/indexes/products/settings/displayed-attributes") => hashset!{"settings.update", "*"},
("POST", "/indexes/products/settings/distinct-attribute") => hashset!{"settings.update", "*"},
("POST", "/indexes/products/settings/filterable-attributes") => hashset!{"settings.update", "*"},
("POST", "/indexes/products/settings/ranking-rules") => hashset!{"settings.update", "*"},
("POST", "/indexes/products/settings/searchable-attributes") => hashset!{"settings.update", "*"},
("POST", "/indexes/products/settings/sortable-attributes") => hashset!{"settings.update", "*"},
("POST", "/indexes/products/settings/stop-words") => hashset!{"settings.update", "*"},
("POST", "/indexes/products/settings/synonyms") => hashset!{"settings.update", "*"},
("GET", "/indexes/products/stats") => hashset!{"stats.get", "*"},
("GET", "/stats") => hashset!{"stats.get", "*"},
("POST", "/dumps") => hashset!{"dumps.create", "*"},
("GET", "/dumps/0/status") => hashset!{"dumps.get", "*"},
("GET", "/version") => hashset!{"version", "*"},
("POST", "/indexes/products/search") => "search",
("GET", "/indexes/products/search") => "search",
("POST", "/indexes/products/documents") => "documents.add",
("GET", "/indexes/products/documents") => "documents.get",
("GET", "/indexes/products/documents/0") => "documents.get",
("DELETE", "/indexes/products/documents/0") => "documents.delete",
("GET", "/tasks") => "tasks.get",
("GET", "/indexes/products/tasks") => "tasks.get",
("GET", "/indexes/products/tasks/0") => "tasks.get",
("PUT", "/indexes/products/") => "indexes.update",
("GET", "/indexes/products/") => "indexes.get",
("DELETE", "/indexes/products/") => "indexes.delete",
("POST", "/indexes") => "indexes.create",
("GET", "/indexes") => "indexes.get",
("GET", "/indexes/products/settings") => "settings.get",
("GET", "/indexes/products/settings/displayed-attributes") => "settings.get",
("GET", "/indexes/products/settings/distinct-attribute") => "settings.get",
("GET", "/indexes/products/settings/filterable-attributes") => "settings.get",
("GET", "/indexes/products/settings/ranking-rules") => "settings.get",
("GET", "/indexes/products/settings/searchable-attributes") => "settings.get",
("GET", "/indexes/products/settings/sortable-attributes") => "settings.get",
("GET", "/indexes/products/settings/stop-words") => "settings.get",
("GET", "/indexes/products/settings/synonyms") => "settings.get",
("DELETE", "/indexes/products/settings") => "settings.update",
("POST", "/indexes/products/settings") => "settings.update",
("POST", "/indexes/products/settings/displayed-attributes") => "settings.update",
("POST", "/indexes/products/settings/distinct-attribute") => "settings.update",
("POST", "/indexes/products/settings/filterable-attributes") => "settings.update",
("POST", "/indexes/products/settings/ranking-rules") => "settings.update",
("POST", "/indexes/products/settings/searchable-attributes") => "settings.update",
("POST", "/indexes/products/settings/sortable-attributes") => "settings.update",
("POST", "/indexes/products/settings/stop-words") => "settings.update",
("POST", "/indexes/products/settings/synonyms") => "settings.update",
("GET", "/indexes/products/stats") => "stats.get",
("GET", "/stats") => "stats.get",
("POST", "/dumps") => "dumps.create",
("GET", "/dumps/0/status") => "dumps.get",
("GET", "/version") => "version",
}
});
pub static ALL_ACTIONS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
AUTHORIZATIONS
.values()
.cloned()
.reduce(|l, r| l.union(&r).cloned().collect())
.unwrap()
});
static ALL_ACTIONS: Lazy<HashSet<&'static str>> =
Lazy::new(|| AUTHORIZATIONS.values().cloned().collect());
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
json!({"message": "The provided API key is invalid.",
@@ -67,7 +61,6 @@ static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
});
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn error_access_expired_key() {
use std::{thread, time};
@@ -77,7 +70,7 @@ async fn error_access_expired_key() {
let content = json!({
"indexes": ["products"],
"actions": ALL_ACTIONS.clone(),
"expiresAt": (OffsetDateTime::now_utc() + Duration::seconds(1)).format(&Rfc3339).unwrap(),
"expiresAt": (Utc::now() + Duration::seconds(1)),
});
let (response, code) = server.add_api_key(content).await;
@@ -99,7 +92,6 @@ async fn error_access_expired_key() {
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn error_access_unauthorized_index() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
@@ -107,7 +99,7 @@ async fn error_access_unauthorized_index() {
let content = json!({
"indexes": ["sales"],
"actions": ALL_ACTIONS.clone(),
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
"expiresAt": Utc::now() + Duration::hours(1),
});
let (response, code) = server.add_api_key(content).await;
@@ -130,7 +122,6 @@ async fn error_access_unauthorized_index() {
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn error_access_unauthorized_action() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
@@ -138,7 +129,7 @@ async fn error_access_unauthorized_action() {
let content = json!({
"indexes": ["products"],
"actions": [],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
"expiresAt": Utc::now() + Duration::hours(1),
});
let (response, code) = server.add_api_key(content).await;
@@ -153,7 +144,7 @@ async fn error_access_unauthorized_action() {
// Patch API key letting all rights but the needed one.
let content = json!({
"actions": ALL_ACTIONS.difference(action).collect::<Vec<_>>(),
"actions": ALL_ACTIONS.iter().cloned().filter(|a| a != action).collect::<Vec<_>>(),
});
let (_, code) = server.patch_api_key(&key, content).await;
assert_eq!(code, 200);
@@ -167,7 +158,6 @@ async fn error_access_unauthorized_action() {
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn access_authorized_restricted_index() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
@@ -175,7 +165,7 @@ async fn access_authorized_restricted_index() {
let content = json!({
"indexes": ["products"],
"actions": [],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
"expiresAt": Utc::now() + Duration::hours(1),
});
let (response, code) = server.add_api_key(content).await;
@@ -185,28 +175,40 @@ async fn access_authorized_restricted_index() {
let key = response["key"].as_str().unwrap();
server.use_api_key(&key);
for ((method, route), actions) in AUTHORIZATIONS.iter() {
for action in actions {
// Patch API key letting only the needed action.
let content = json!({
"actions": [action],
});
for ((method, route), action) in AUTHORIZATIONS.iter() {
// Patch API key letting only the needed action.
let content = json!({
"actions": [action],
});
server.use_api_key("MASTER_KEY");
let (_, code) = server.patch_api_key(&key, content).await;
assert_eq!(code, 200);
server.use_api_key("MASTER_KEY");
let (_, code) = server.patch_api_key(&key, content).await;
assert_eq!(code, 200);
server.use_api_key(&key);
let (response, code) = server.dummy_request(method, route).await;
server.use_api_key(&key);
let (response, code) = server.dummy_request(method, route).await;
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
}
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
// Patch API key using action all action.
let content = json!({
"actions": ["*"],
});
server.use_api_key("MASTER_KEY");
let (_, code) = server.patch_api_key(&key, content).await;
assert_eq!(code, 200);
server.use_api_key(&key);
let (response, code) = server.dummy_request(method, route).await;
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
}
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn access_authorized_no_index_restriction() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
@@ -214,7 +216,7 @@ async fn access_authorized_no_index_restriction() {
let content = json!({
"indexes": ["*"],
"actions": [],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
"expiresAt": Utc::now() + Duration::hours(1),
});
let (response, code) = server.add_api_key(content).await;
@@ -224,28 +226,40 @@ async fn access_authorized_no_index_restriction() {
let key = response["key"].as_str().unwrap();
server.use_api_key(&key);
for ((method, route), actions) in AUTHORIZATIONS.iter() {
for action in actions {
server.use_api_key("MASTER_KEY");
for ((method, route), action) in AUTHORIZATIONS.iter() {
server.use_api_key("MASTER_KEY");
// Patch API key letting only the needed action.
let content = json!({
"actions": [action],
});
let (_, code) = server.patch_api_key(&key, content).await;
assert_eq!(code, 200);
// Patch API key letting only the needed action.
let content = json!({
"actions": [action],
});
let (_, code) = server.patch_api_key(&key, content).await;
assert_eq!(code, 200);
server.use_api_key(&key);
let (response, code) = server.dummy_request(method, route).await;
server.use_api_key(&key);
let (response, code) = server.dummy_request(method, route).await;
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
}
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
// Patch API key using action all action.
let content = json!({
"actions": ["*"],
});
server.use_api_key("MASTER_KEY");
let (_, code) = server.patch_api_key(&key, content).await;
assert_eq!(code, 200);
server.use_api_key(&key);
let (response, code) = server.dummy_request(method, route).await;
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
}
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn access_authorized_stats_restricted_index() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
@@ -264,7 +278,7 @@ async fn access_authorized_stats_restricted_index() {
let content = json!({
"indexes": ["products"],
"actions": ["stats.get"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
"expiresAt": Utc::now() + Duration::hours(1),
});
let (response, code) = server.add_api_key(content).await;
assert_eq!(code, 201);
@@ -285,7 +299,6 @@ async fn access_authorized_stats_restricted_index() {
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn access_authorized_stats_no_index_restriction() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
@@ -304,7 +317,7 @@ async fn access_authorized_stats_no_index_restriction() {
let content = json!({
"indexes": ["*"],
"actions": ["stats.get"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
"expiresAt": Utc::now() + Duration::hours(1),
});
let (response, code) = server.add_api_key(content).await;
assert_eq!(code, 201);
@@ -325,7 +338,6 @@ async fn access_authorized_stats_no_index_restriction() {
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn list_authorized_indexes_restricted_index() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
@@ -344,7 +356,7 @@ async fn list_authorized_indexes_restricted_index() {
let content = json!({
"indexes": ["products"],
"actions": ["indexes.get"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
"expiresAt": Utc::now() + Duration::hours(1),
});
let (response, code) = server.add_api_key(content).await;
assert_eq!(code, 201);
@@ -366,7 +378,6 @@ async fn list_authorized_indexes_restricted_index() {
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn list_authorized_indexes_no_index_restriction() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
@@ -385,7 +396,7 @@ async fn list_authorized_indexes_no_index_restriction() {
let content = json!({
"indexes": ["*"],
"actions": ["indexes.get"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
"expiresAt": Utc::now() + Duration::hours(1),
});
let (response, code) = server.add_api_key(content).await;
assert_eq!(code, 201);
@@ -425,7 +436,7 @@ async fn list_authorized_tasks_restricted_index() {
let content = json!({
"indexes": ["products"],
"actions": ["tasks.get"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
"expiresAt": Utc::now() + Duration::hours(1),
});
let (response, code) = server.add_api_key(content).await;
assert_eq!(code, 201);
@@ -465,7 +476,7 @@ async fn list_authorized_tasks_no_index_restriction() {
let content = json!({
"indexes": ["*"],
"actions": ["tasks.get"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
"expiresAt": Utc::now() + Duration::hours(1),
});
let (response, code) = server.add_api_key(content).await;
assert_eq!(code, 201);
@@ -494,8 +505,7 @@ async fn error_creating_index_without_action() {
// create key with access on all indexes.
let content = json!({
"indexes": ["*"],
// Give all action but the ones allowing to create an index.
"actions": ALL_ACTIONS.iter().cloned().filter(|a| !AUTHORIZATIONS.get(&("POST","/indexes")).unwrap().contains(a)).collect::<Vec<_>>(),
"actions": ALL_ACTIONS.iter().cloned().filter(|a| *a != "indexes.create").collect::<Vec<_>>(),
"expiresAt": "2050-11-13T00:00:00Z"
});
let (response, code) = server.add_api_key(content).await;

View File

@@ -1,7 +1,6 @@
mod api_keys;
mod authorization;
mod payload;
mod tenant_token;
use crate::common::Server;
use actix_web::http::StatusCode;

View File

@@ -1,575 +0,0 @@
use crate::common::Server;
use ::time::format_description::well_known::Rfc3339;
use maplit::hashmap;
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use std::collections::HashMap;
use time::{Duration, OffsetDateTime};
use super::authorization::{ALL_ACTIONS, AUTHORIZATIONS};
fn generate_tenant_token(parent_key: impl AsRef<str>, mut body: HashMap<&str, Value>) -> String {
use jsonwebtoken::{encode, EncodingKey, Header};
let key_id = &parent_key.as_ref()[..8];
body.insert("apiKeyPrefix", json!(key_id));
encode(
&Header::default(),
&body,
&EncodingKey::from_secret(parent_key.as_ref().as_bytes()),
)
.unwrap()
}
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"id": "287947",
"color": ["green", "blue"]
},
{
"title": "Captain Marvel",
"id": "299537",
"color": ["yellow", "blue"]
},
{
"title": "Escape Room",
"id": "522681",
"color": ["yellow", "red"]
},
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": ["green", "red"]
},
{
"title": "Glass",
"id": "450465",
"color": ["blue", "red"]
}
])
});
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
json!({"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
})
});
static ACCEPTED_KEYS: Lazy<Vec<Value>> = Lazy::new(|| {
vec![
json!({
"indexes": ["*"],
"actions": ["*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
json!({
"indexes": ["*"],
"actions": ["search"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
json!({
"indexes": ["sales"],
"actions": ["*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
json!({
"indexes": ["sales"],
"actions": ["search"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
]
});
static REFUSED_KEYS: Lazy<Vec<Value>> = Lazy::new(|| {
vec![
// no search action
json!({
"indexes": ["*"],
"actions": ALL_ACTIONS.iter().cloned().filter(|a| *a != "search" && *a != "*").collect::<Vec<_>>(),
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
json!({
"indexes": ["sales"],
"actions": ALL_ACTIONS.iter().cloned().filter(|a| *a != "search" && *a != "*").collect::<Vec<_>>(),
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
// bad index
json!({
"indexes": ["products"],
"actions": ["*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
json!({
"indexes": ["products"],
"actions": ["search"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
]
});
macro_rules! compute_autorized_search {
($tenant_tokens:expr, $filter:expr, $expected_count:expr) => {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
let index = server.index("sales");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
index.wait_task(1).await;
drop(index);
for key_content in ACCEPTED_KEYS.iter() {
server.use_api_key("MASTER_KEY");
let (response, code) = server.add_api_key(key_content.clone()).await;
assert_eq!(code, 201);
let key = response["key"].as_str().unwrap();
for tenant_token in $tenant_tokens.iter() {
let web_token = generate_tenant_token(&key, tenant_token.clone());
server.use_api_key(&web_token);
let index = server.index("sales");
index
.search(json!({ "filter": $filter }), |response, code| {
assert_eq!(
code, 200,
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response, tenant_token, key_content
);
assert_eq!(
response["hits"].as_array().unwrap().len(),
$expected_count,
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response,
tenant_token,
key_content
);
})
.await;
}
}
};
}
macro_rules! compute_forbidden_search {
($tenant_tokens:expr, $parent_keys:expr) => {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
let index = server.index("sales");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
drop(index);
for key_content in $parent_keys.iter() {
server.use_api_key("MASTER_KEY");
let (response, code) = server.add_api_key(key_content.clone()).await;
assert_eq!(code, 201, "{:?}", response);
let key = response["key"].as_str().unwrap();
for tenant_token in $tenant_tokens.iter() {
let web_token = generate_tenant_token(&key, tenant_token.clone());
server.use_api_key(&web_token);
let index = server.index("sales");
index
.search(json!({}), |response, code| {
assert_eq!(
response,
INVALID_RESPONSE.clone(),
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response,
tenant_token,
key_content
);
assert_eq!(
code, 403,
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response, tenant_token, key_content
);
})
.await;
}
}
};
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn search_authorized_simple_token() {
let tenant_tokens = vec![
hashmap! {
"searchRules" => json!({"*": {}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!(["*"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": {}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!(["sales"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"*": {}}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"*": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["*"]),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"sales": {}}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"sales": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["sales"]),
"exp" => Value::Null
},
];
compute_autorized_search!(tenant_tokens, {}, 5);
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn search_authorized_filter_token() {
let tenant_tokens = vec![
hashmap! {
"searchRules" => json!({"*": {"filter": "color = blue"}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": {"filter": "color = blue"}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"*": {"filter": ["color = blue"]}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": {"filter": ["color = blue"]}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
// filter on sales should override filters on *
hashmap! {
"searchRules" => json!({
"*": {"filter": "color = green"},
"sales": {"filter": "color = blue"}
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({
"*": {},
"sales": {"filter": "color = blue"}
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({
"*": {"filter": "color = green"},
"sales": {"filter": ["color = blue"]}
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({
"*": {},
"sales": {"filter": ["color = blue"]}
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
];
compute_autorized_search!(tenant_tokens, {}, 3);
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn filter_search_authorized_filter_token() {
let tenant_tokens = vec![
hashmap! {
"searchRules" => json!({"*": {"filter": "color = blue"}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": {"filter": "color = blue"}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"*": {"filter": ["color = blue"]}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": {"filter": ["color = blue"]}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
// filter on sales should override filters on *
hashmap! {
"searchRules" => json!({
"*": {"filter": "color = green"},
"sales": {"filter": "color = blue"}
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({
"*": {},
"sales": {"filter": "color = blue"}
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({
"*": {"filter": "color = green"},
"sales": {"filter": ["color = blue"]}
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({
"*": {},
"sales": {"filter": ["color = blue"]}
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
];
compute_autorized_search!(tenant_tokens, "color = yellow", 1);
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn error_search_token_forbidden_parent_key() {
let tenant_tokens = vec![
hashmap! {
"searchRules" => json!({"*": {}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"*": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!(["*"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": {}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!(["sales"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
];
compute_forbidden_search!(tenant_tokens, REFUSED_KEYS);
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn error_search_forbidden_token() {
let tenant_tokens = vec![
// bad index
hashmap! {
"searchRules" => json!({"products": {}}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!(["products"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"products": {}}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!({"products": Value::Null}),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["products"]),
"exp" => Value::Null
},
// expired token
hashmap! {
"searchRules" => json!({"*": {}}),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"*": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!(["*"]),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": {}}),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({"sales": Value::Null}),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!(["sales"]),
"exp" => json!((OffsetDateTime::now_utc() - Duration::hours(1)).unix_timestamp())
},
];
compute_forbidden_search!(tenant_tokens, ACCEPTED_KEYS);
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn error_access_forbidden_routes() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
let content = json!({
"indexes": ["*"],
"actions": ["*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
});
let (response, code) = server.add_api_key(content).await;
assert_eq!(code, 201);
assert!(response["key"].is_string());
let key = response["key"].as_str().unwrap();
let tenant_token = hashmap! {
"searchRules" => json!(["*"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
};
let web_token = generate_tenant_token(&key, tenant_token);
server.use_api_key(&web_token);
for ((method, route), actions) in AUTHORIZATIONS.iter() {
if !actions.contains("search") {
let (response, code) = server.dummy_request(method, route).await;
assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403);
}
}
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn error_access_expired_parent_key() {
use std::{thread, time};
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
let content = json!({
"indexes": ["*"],
"actions": ["*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::seconds(1)).format(&Rfc3339).unwrap(),
});
let (response, code) = server.add_api_key(content).await;
assert_eq!(code, 201);
assert!(response["key"].is_string());
let key = response["key"].as_str().unwrap();
let tenant_token = hashmap! {
"searchRules" => json!(["*"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
};
let web_token = generate_tenant_token(&key, tenant_token);
server.use_api_key(&web_token);
// test search request while parent_key is not expired
let (response, code) = server
.dummy_request("POST", "/indexes/products/search")
.await;
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
// wait until the key is expired.
thread::sleep(time::Duration::new(1, 0));
let (response, code) = server
.dummy_request("POST", "/indexes/products/search")
.await;
assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403);
}
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn error_access_modified_token() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
let content = json!({
"indexes": ["*"],
"actions": ["*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
});
let (response, code) = server.add_api_key(content).await;
assert_eq!(code, 201);
assert!(response["key"].is_string());
let key = response["key"].as_str().unwrap();
let tenant_token = hashmap! {
"searchRules" => json!(["products"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
};
let web_token = generate_tenant_token(&key, tenant_token);
server.use_api_key(&web_token);
// test search request while web_token is valid
let (response, code) = server
.dummy_request("POST", "/indexes/products/search")
.await;
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
let tenant_token = hashmap! {
"searchRules" => json!(["*"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
};
let alt = generate_tenant_token(&key, tenant_token);
let altered_token = [
web_token.split('.').next().unwrap(),
alt.split('.').nth(1).unwrap(),
web_token.split('.').nth(2).unwrap(),
]
.join(".");
server.use_api_key(&altered_token);
let (response, code) = server
.dummy_request("POST", "/indexes/products/search")
.await;
assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403);
}

View File

@@ -130,7 +130,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
master_key: None,
env: "development".to_owned(),
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics: true,
no_analytics: Some(Some(true)),
max_index_size: Byte::from_unit(4.0, ByteUnit::GiB).unwrap(),
max_task_db_size: Byte::from_unit(4.0, ByteUnit::GiB).unwrap(),
http_payload_size_limit: Byte::from_unit(10.0, ByteUnit::MiB).unwrap(),
@@ -148,14 +148,11 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
schedule_snapshot: false,
snapshot_interval_sec: 0,
import_dump: None,
ignore_missing_dump: false,
ignore_dump_if_db_exists: false,
indexer_options: IndexerOpts {
// memory has to be unlimited because several meilisearch are running in test context.
max_memory: MaxMemory::unlimited(),
..Default::default()
},
log_level: "off".into(),
scheduler_options: meilisearch_lib::options::SchedulerConfig::default(),
}
}

View File

@@ -1,8 +1,8 @@
use crate::common::{GetAllDocumentsOptions, Server};
use actix_web::test;
use chrono::DateTime;
use meilisearch_http::{analytics, create_app};
use serde_json::{json, Value};
use time::{format_description::well_known::Rfc3339, OffsetDateTime};
/// This is the basic usage of our API and every other tests uses the content-type application/json
#[actix_rt::test]
@@ -568,9 +568,9 @@ async fn add_documents_no_index_creation() {
assert_eq!(response["details"]["indexedDocuments"], 1);
let processed_at =
OffsetDateTime::parse(response["finishedAt"].as_str().unwrap(), &Rfc3339).unwrap();
DateTime::parse_from_rfc3339(response["finishedAt"].as_str().unwrap()).unwrap();
let enqueued_at =
OffsetDateTime::parse(response["enqueuedAt"].as_str().unwrap(), &Rfc3339).unwrap();
DateTime::parse_from_rfc3339(response["enqueuedAt"].as_str().unwrap()).unwrap();
assert!(processed_at > enqueued_at);
// index was created, and primary key was infered.
@@ -710,11 +710,20 @@ async fn replace_document() {
}
#[actix_rt::test]
async fn add_no_documents() {
async fn error_add_no_documents() {
let server = Server::new().await;
let index = server.index("test");
let (_response, code) = index.add_documents(json!([]), None).await;
assert_eq!(code, 202);
let (response, code) = index.add_documents(json!([]), None).await;
let expected_response = json!({
"message": "The `json` payload must contain at least one document.",
"code": "malformed_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#malformed_payload"
});
assert_eq!(response, expected_response);
assert_eq!(code, 400);
}
#[actix_rt::test]

View File

@@ -43,8 +43,8 @@ async fn error_delete_unexisting_index() {
assert_eq!(response["error"], expected_response);
}
#[cfg(not(windows))]
#[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn loop_delete_add_documents() {
let server = Server::new().await;
let index = server.index("test");

View File

@@ -1,6 +1,6 @@
use crate::common::Server;
use chrono::DateTime;
use serde_json::json;
use time::{format_description::well_known::Rfc3339, OffsetDateTime};
#[actix_rt::test]
async fn update_primary_key() {
@@ -25,10 +25,8 @@ async fn update_primary_key() {
assert!(response.get("createdAt").is_some());
assert!(response.get("updatedAt").is_some());
let created_at =
OffsetDateTime::parse(response["createdAt"].as_str().unwrap(), &Rfc3339).unwrap();
let updated_at =
OffsetDateTime::parse(response["updatedAt"].as_str().unwrap(), &Rfc3339).unwrap();
let created_at = DateTime::parse_from_rfc3339(response["createdAt"].as_str().unwrap()).unwrap();
let updated_at = DateTime::parse_from_rfc3339(response["updatedAt"].as_str().unwrap()).unwrap();
assert!(created_at < updated_at);
assert_eq!(response["primaryKey"], "primary");

View File

@@ -60,7 +60,10 @@ async fn perform_snapshot() {
let temp = tempfile::tempdir().unwrap();
let snapshot_path = snapshot_dir.path().to_owned().join("db.snapshot");
let snapshot_path = snapshot_dir
.path()
.to_owned()
.join("db.snapshot".to_string());
let options = Opt {
import_snapshot: Some(snapshot_path),

View File

@@ -1,5 +1,4 @@
use serde_json::json;
use time::{format_description::well_known::Rfc3339, OffsetDateTime};
use crate::common::Server;
@@ -58,15 +57,11 @@ async fn stats() {
index.wait_task(1).await;
let timestamp = OffsetDateTime::now_utc();
let (response, code) = server.stats().await;
assert_eq!(code, 200);
assert!(response["databaseSize"].as_u64().unwrap() > 0);
let last_update =
OffsetDateTime::parse(response["lastUpdate"].as_str().unwrap(), &Rfc3339).unwrap();
assert!(last_update - timestamp < time::Duration::SECOND);
assert!(response.get("lastUpdate").is_some());
assert_eq!(response["indexes"]["test"]["numberOfDocuments"], 2);
assert!(response["indexes"]["test"]["isIndexing"] == false);
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["id"], 2);

View File

@@ -1,7 +1,6 @@
use crate::common::Server;
use chrono::{DateTime, Utc};
use serde_json::json;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
#[actix_rt::test]
async fn error_get_task_unexisting_index() {
@@ -99,8 +98,7 @@ macro_rules! assert_valid_summarized_task {
assert_eq!($response["status"], "enqueued");
assert_eq!($response["type"], $task_type);
let date = $response["enqueuedAt"].as_str().expect("missing date");
OffsetDateTime::parse(date, &Rfc3339).unwrap();
date.parse::<DateTime<Utc>>().unwrap();
}};
}

View File

@@ -1,17 +1,20 @@
[package]
name = "meilisearch-lib"
version = "0.26.0"
edition = "2021"
version = "0.25.2"
edition = "2018"
resolver = "2"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
actix-web = { version = "4", default-features = false }
actix-web = { version = "4.0.0-beta.9", features = ["rustls"] }
actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true }
anyhow = { version = "1.0.43", features = ["backtrace"] }
async-stream = "0.3.2"
async-trait = "0.1.51"
byte-unit = { version = "4.0.12", default-features = false, features = ["std"] }
bytes = "1.1.0"
chrono = { version = "0.4.19", features = ["serde"] }
csv = "1.1.6"
crossbeam-channel = "0.5.1"
either = "1.6.1"
@@ -27,7 +30,7 @@ lazy_static = "1.4.0"
log = "0.4.14"
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-auth = { path = "../meilisearch-auth" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.22.2" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.21.1" }
mime = "0.3.16"
num_cpus = "1.13.0"
once_cell = "1.8.0"
@@ -40,11 +43,10 @@ serde = { version = "1.0.130", features = ["derive"] }
serde_json = { version = "1.0.67", features = ["preserve_order"] }
siphasher = "0.3.7"
slice-group-by = "0.2.6"
clap = { version = "3.0", features = ["derive", "env"] }
structopt = "0.3.23"
tar = "0.4.37"
tempfile = "3.2.0"
thiserror = "1.0.28"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = { version = "1.11.0", features = ["full"] }
uuid = { version = "0.8.2", features = ["serde"] }
walkdir = "2.3.2"
@@ -55,13 +57,12 @@ reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-featu
sysinfo = "0.20.2"
derivative = "2.2.0"
fs_extra = "1.2.0"
atomic_refcell = "0.1.8"
[dev-dependencies]
actix-rt = "2.2.0"
mockall = "0.10.2"
paste = "1.0.5"
nelson = { git = "https://github.com/MarinPostma/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
nelson = { git = "https://github.com/MarinPostma/nelson.git", rev = "e5f4ff046c21e7e986c7cb31550d1c9e7f0b693b"}
meilisearch-error = { path = "../meilisearch-error", features = ["test-traits"] }
proptest = "1.0.0"
proptest-derive = "0.3.0"

View File

@@ -17,3 +17,4 @@ cc 3a01c78db082434b8a4f8914abf0d1059d39f4426d16df20d72e1bd7ebb94a6a # shrinks to
cc c450806df3921d1e6fe9b6af93d999e8196d0175b69b64f1810802582421e94a # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
cc fb6b98947cbdbdee05ed3c0bf2923aad2c311edc276253642eb43a0c0ec4888a # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = true, any_int = 0
cc 1aa59d8e22484e9915efbb5818e1e1ab684aa61b166dc82130d6221663ba00bf # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: DocumentDeletion(Clear), events: [] }, index_exists = true, index_op_fails = false, any_int = 0
cc 2e8644e6397b5f76e0b79f961fa125e2f45f42f26e03c453c9a174dfb427500d # shrinks to task = Task { id: 0, index_uid: IndexUid("0"), content: SettingsUpdate { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, synonyms: NotSet, distinct_attribute: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: false }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0

View File

@@ -32,6 +32,8 @@ pub enum DocumentFormatError {
Box<dyn std::error::Error + Send + Sync + 'static>,
PayloadType,
),
#[error("The `{0}` payload must contain at least one document.")]
EmptyPayload(PayloadType),
}
impl From<(PayloadType, milli::documents::Error)> for DocumentFormatError {
@@ -48,6 +50,7 @@ impl ErrorCode for DocumentFormatError {
match self {
DocumentFormatError::Internal(_) => Code::Internal,
DocumentFormatError::MalformedPayload(_, _) => Code::MalformedPayload,
DocumentFormatError::EmptyPayload(_) => Code::MalformedPayload,
}
}
}
@@ -60,6 +63,10 @@ pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
let builder =
DocumentBatchBuilder::from_csv(input, writer).map_err(|e| (PayloadType::Csv, e))?;
if builder.len() == 0 {
return Err(DocumentFormatError::EmptyPayload(PayloadType::Csv));
}
let count = builder.finish().map_err(|e| (PayloadType::Csv, e))?;
Ok(count)
@@ -74,17 +81,16 @@ pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result<usize>
let mut buf = String::new();
while reader.read_line(&mut buf)? > 0 {
// skip empty lines
if buf == "\n" {
buf.clear();
continue;
}
builder
.extend_from_json(Cursor::new(&buf.as_bytes()))
.map_err(|e| (PayloadType::Ndjson, e))?;
buf.clear();
}
if builder.len() == 0 {
return Err(DocumentFormatError::EmptyPayload(PayloadType::Ndjson));
}
let count = builder.finish().map_err(|e| (PayloadType::Ndjson, e))?;
Ok(count)
@@ -98,6 +104,10 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
.extend_from_json(input)
.map_err(|e| (PayloadType::Json, e))?;
if builder.len() == 0 {
return Err(DocumentFormatError::EmptyPayload(PayloadType::Json));
}
let count = builder.finish().map_err(|e| (PayloadType::Json, e))?;
Ok(count)

View File

@@ -6,10 +6,10 @@ use anyhow::Context;
use heed::{EnvOpenOptions, RoTxn};
use indexmap::IndexMap;
use milli::documents::DocumentBatchReader;
use milli::update::{IndexDocumentsConfig, IndexerConfig};
use serde::{Deserialize, Serialize};
use crate::document_formats::read_ndjson;
use crate::document_formats::{read_ndjson, DocumentFormatError};
use crate::index::update_handler::UpdateHandler;
use crate::index::updates::apply_settings_to_builder;
use super::error::Result;
@@ -85,7 +85,7 @@ impl Index {
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
size: usize,
indexer_config: &IndexerConfig,
update_handler: &UpdateHandler,
) -> anyhow::Result<()> {
let dir_name = src
.as_ref()
@@ -110,7 +110,8 @@ impl Index {
let mut txn = index.write_txn()?;
// Apply settings first
let mut builder = milli::update::Settings::new(&mut txn, &index, indexer_config);
let builder = update_handler.update_builder();
let mut builder = builder.settings(&mut txn, &index);
if let Some(primary_key) = primary_key {
builder.set_primary_key(primary_key);
@@ -127,8 +128,8 @@ impl Index {
let empty = match read_ndjson(reader, &mut tmp_doc_file) {
// if there was no document in the file it's because the index was empty
Ok(0) => true,
Ok(_) => false,
Err(DocumentFormatError::EmptyPayload(_)) => true,
Err(e) => return Err(e.into()),
};
@@ -139,16 +140,12 @@ impl Index {
//If the document file is empty, we don't perform the document addition, to prevent
//a primary key error to be thrown.
let config = IndexDocumentsConfig::default();
let mut builder = milli::update::IndexDocuments::new(
&mut txn,
&index,
indexer_config,
config,
|_| (),
);
builder.add_documents(documents_reader)?;
builder.execute()?;
if !documents_reader.is_empty() {
let builder = update_handler
.update_builder()
.index_documents(&mut txn, &index);
builder.execute(documents_reader, |_| ())?;
}
}
txn.commit()?;

View File

@@ -3,7 +3,7 @@ use std::error::Error;
use meilisearch_error::{internal_error, Code, ErrorCode};
use serde_json::Value;
use crate::{error::MilliError, update_file_store};
use crate::error::MilliError;
pub type Result<T> = std::result::Result<T, IndexError>;
@@ -23,9 +23,7 @@ internal_error!(
IndexError: std::io::Error,
heed::Error,
fst::Error,
serde_json::Error,
update_file_store::UpdateFileStoreError,
milli::documents::Error
serde_json::Error
);
impl ErrorCode for IndexError {

View File

@@ -5,18 +5,19 @@ use std::ops::Deref;
use std::path::Path;
use std::sync::Arc;
use chrono::{DateTime, Utc};
use heed::{EnvOpenOptions, RoTxn};
use milli::update::{IndexerConfig, Setting};
use milli::update::Setting;
use milli::{obkv_to_json, FieldDistribution, FieldId};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use time::OffsetDateTime;
use uuid::Uuid;
use crate::EnvSizer;
use super::error::IndexError;
use super::error::Result;
use super::update_handler::UpdateHandler;
use super::{Checked, Settings};
pub type Document = Map<String, Value>;
@@ -24,10 +25,8 @@ pub type Document = Map<String, Value>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMeta {
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub primary_key: Option<String>,
}
@@ -69,7 +68,7 @@ pub struct Index {
#[derivative(Debug = "ignore")]
pub inner: Arc<milli::Index>,
#[derivative(Debug = "ignore")]
pub indexer_config: Arc<IndexerConfig>,
pub update_handler: Arc<UpdateHandler>,
}
impl Deref for Index {
@@ -85,7 +84,7 @@ impl Index {
path: impl AsRef<Path>,
size: usize,
uuid: Uuid,
update_handler: Arc<IndexerConfig>,
update_handler: Arc<UpdateHandler>,
) -> Result<Self> {
log::debug!("opening index in {}", path.as_ref().display());
create_dir_all(&path)?;
@@ -95,7 +94,7 @@ impl Index {
Ok(Index {
inner,
uuid,
indexer_config: update_handler,
update_handler,
})
}
@@ -152,7 +151,7 @@ impl Index {
Ok(stop_words.stream().into_strs()?.into_iter().collect())
})
.transpose()?
.unwrap_or_default();
.unwrap_or_else(BTreeSet::new);
let distinct_field = self.distinct_field(txn)?.map(String::from);
// in milli each word in the synonyms map were split on their separator. Since we lost

View File

@@ -4,6 +4,7 @@ pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecke
mod dump;
pub mod error;
mod search;
pub mod update_handler;
pub mod updates;
#[allow(clippy::module_inception)]
@@ -25,7 +26,6 @@ pub mod test {
use std::path::PathBuf;
use std::sync::Arc;
use milli::update::IndexerConfig;
use milli::update::{DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod};
use nelson::Mocker;
use serde_json::{Map, Value};
@@ -33,6 +33,7 @@ pub mod test {
use super::error::Result;
use super::index::Index;
use super::update_handler::UpdateHandler;
use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings};
use crate::update_file_store::UpdateFileStore;
@@ -51,7 +52,7 @@ pub mod test {
path: impl AsRef<Path>,
size: usize,
uuid: Uuid,
update_handler: Arc<IndexerConfig>,
update_handler: Arc<UpdateHandler>,
) -> Result<Self> {
let index = Index::open(path, size, uuid, update_handler)?;
Ok(Self::Real(index))
@@ -61,7 +62,7 @@ pub mod test {
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
size: usize,
update_handler: &IndexerConfig,
update_handler: &UpdateHandler,
) -> anyhow::Result<()> {
Index::load_dump(src, dst, size, update_handler)
}
@@ -156,18 +157,21 @@ pub mod test {
pub fn update_documents(
&self,
method: IndexDocumentsMethod,
content_uuid: Uuid,
primary_key: Option<String>,
file_store: UpdateFileStore,
contents: impl Iterator<Item = Uuid>,
) -> Result<DocumentAdditionResult> {
match self {
MockIndex::Real(index) => {
index.update_documents(method, primary_key, file_store, contents)
index.update_documents(method, content_uuid, primary_key, file_store)
}
MockIndex::Mock(mocker) => unsafe {
mocker
.get("update_documents")
.call((method, primary_key, file_store, contents))
mocker.get("update_documents").call((
method,
content_uuid,
primary_key,
file_store,
))
},
}
}

View File

@@ -295,7 +295,7 @@ fn compute_value_matches<'a, A: AsRef<[u8]>>(
let mut start = 0;
for (word, token) in analyzed.reconstruct() {
if token.is_word() {
if let Some(length) = matcher.matches(&token) {
if let Some(length) = matcher.matches(token.text()) {
infos.push(MatchInfo { start, length });
}
}
@@ -486,18 +486,18 @@ fn format_fields<A: AsRef<[u8]>>(
/// trait to allow unit testing of `format_fields`
trait Matcher {
fn matches(&self, w: &Token) -> Option<usize>;
fn matches(&self, w: &str) -> Option<usize>;
}
#[cfg(test)]
impl Matcher for BTreeMap<&str, Option<usize>> {
fn matches(&self, w: &Token) -> Option<usize> {
self.get(w.text()).cloned().flatten()
fn matches(&self, w: &str) -> Option<usize> {
self.get(w).cloned().flatten()
}
}
impl Matcher for MatchingWords {
fn matches(&self, w: &Token) -> Option<usize> {
fn matches(&self, w: &str) -> Option<usize> {
self.matching_bytes(w)
}
}
@@ -579,7 +579,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
let mut tokens = analyzed.reconstruct().peekable();
while let Some((word, token)) =
tokens.next_if(|(_, token)| matcher.matches(token).is_none())
tokens.next_if(|(_, token)| matcher.matches(token.text()).is_none())
{
buffer.push((word, token));
}
@@ -623,7 +623,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
// Check if we need to do highlighting or computed matches before calling
// Matcher::match since the call is expensive.
if format_options.highlight && token.is_word() {
if let Some(length) = matcher.matches(&token) {
if let Some(length) = matcher.matches(token.text()) {
match word.get(..length).zip(word.get(length..)) {
Some((head, tail)) => {
out.push_str(&self.marks.0);
@@ -653,7 +653,7 @@ fn parse_filter(facets: &Value) -> Result<Option<Filter>> {
match facets {
Value::String(expr) => {
let condition = Filter::from_str(expr)?;
Ok(condition)
Ok(Some(condition))
}
Value::Array(arr) => parse_filter_array(arr),
v => Err(FacetError::InvalidExpression(&["Array"], v.clone()).into()),
@@ -877,7 +877,7 @@ mod test {
assert_eq!(value["publication_year"], "<em>1937</em>");
}
/// https://github.com/meilisearch/meilisearch/issues/1368
/// https://github.com/meilisearch/MeiliSearch/issues/1368
#[test]
fn formatted_with_highlight_emoji() {
let stop_words = fst::Set::default();

View File

@@ -0,0 +1,49 @@
use milli::update::UpdateBuilder;
use milli::CompressionType;
use rayon::ThreadPool;
use crate::options::IndexerOpts;
pub struct UpdateHandler {
max_nb_chunks: Option<usize>,
chunk_compression_level: Option<u32>,
thread_pool: ThreadPool,
log_frequency: usize,
max_memory: Option<usize>,
chunk_compression_type: CompressionType,
}
impl UpdateHandler {
pub fn new(opt: &IndexerOpts) -> anyhow::Result<Self> {
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(opt.indexing_jobs.unwrap_or(num_cpus::get() / 2))
.build()?;
Ok(Self {
max_nb_chunks: opt.max_nb_chunks,
chunk_compression_level: opt.chunk_compression_level,
thread_pool,
log_frequency: opt.log_every_n,
max_memory: opt.max_memory.map(|m| m.get_bytes() as usize),
chunk_compression_type: opt.chunk_compression_type,
})
}
pub fn update_builder(&self) -> UpdateBuilder {
// We prepare the update by using the update builder.
let mut update_builder = UpdateBuilder::new();
if let Some(max_nb_chunks) = self.max_nb_chunks {
update_builder.max_nb_chunks(max_nb_chunks);
}
if let Some(chunk_compression_level) = self.chunk_compression_level {
update_builder.chunk_compression_level(chunk_compression_level);
}
update_builder.thread_pool(&self.thread_pool);
update_builder.log_every_n(self.log_frequency);
if let Some(max_memory) = self.max_memory {
update_builder.max_memory(max_memory);
}
update_builder.chunk_compression_type(self.chunk_compression_type);
update_builder
}
}

View File

@@ -5,8 +5,7 @@ use std::num::NonZeroUsize;
use log::{debug, info, trace};
use milli::documents::DocumentBatchReader;
use milli::update::{
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Setting,
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, Setting,
};
use serde::{Deserialize, Serialize, Serializer};
use uuid::Uuid;
@@ -179,7 +178,7 @@ impl Index {
txn: &mut heed::RwTxn<'a, 'b>,
primary_key: String,
) -> Result<IndexMeta> {
let mut builder = milli::update::Settings::new(txn, self, self.indexer_config.as_ref());
let mut builder = self.update_handler.update_builder().settings(txn, self);
builder.set_primary_key(primary_key);
builder.execute(|_| ())?;
let meta = IndexMeta::new_txn(self, txn)?;
@@ -198,7 +197,10 @@ impl Index {
/// Deletes `ids` from the index, and returns how many documents were deleted.
pub fn delete_documents(&self, ids: &[String]) -> Result<DocumentDeletionResult> {
let mut txn = self.write_txn()?;
let mut builder = milli::update::DeleteDocuments::new(&mut txn, self)?;
let mut builder = self
.update_handler
.update_builder()
.delete_documents(&mut txn, self)?;
// We ignore unexisting document ids
ids.iter().for_each(|id| {
@@ -214,7 +216,11 @@ impl Index {
pub fn clear_documents(&self) -> Result<()> {
let mut txn = self.write_txn()?;
milli::update::ClearDocuments::new(&mut txn, self).execute()?;
self.update_handler
.update_builder()
.clear_documents(&mut txn, self)
.execute()?;
txn.commit()?;
Ok(())
@@ -223,9 +229,9 @@ impl Index {
pub fn update_documents(
&self,
method: IndexDocumentsMethod,
content_uuid: Uuid,
primary_key: Option<String>,
file_store: UpdateFileStore,
contents: impl IntoIterator<Item = Uuid>,
) -> Result<DocumentAdditionResult> {
trace!("performing document addition");
let mut txn = self.write_txn()?;
@@ -236,27 +242,17 @@ impl Index {
}
}
let config = IndexDocumentsConfig {
update_method: method,
..Default::default()
};
let indexing_callback = |indexing_step| debug!("update: {:?}", indexing_step);
let mut builder = milli::update::IndexDocuments::new(
&mut txn,
self,
self.indexer_config.as_ref(),
config,
indexing_callback,
);
for content_uuid in contents.into_iter() {
let content_file = file_store.get_update(content_uuid)?;
let reader = DocumentBatchReader::from_reader(content_file)?;
builder.add_documents(reader)?;
}
let content_file = file_store.get_update(content_uuid).unwrap();
let reader = DocumentBatchReader::from_reader(content_file).unwrap();
let addition = builder.execute()?;
let mut builder = self
.update_handler
.update_builder()
.index_documents(&mut txn, self);
builder.index_documents_method(method);
let addition = builder.execute(reader, indexing_callback)?;
txn.commit()?;
@@ -268,8 +264,10 @@ impl Index {
pub fn update_settings(&self, settings: &Settings<Checked>) -> Result<()> {
// We must use the write transaction of the update here.
let mut txn = self.write_txn()?;
let mut builder =
milli::update::Settings::new(&mut txn, self, self.indexer_config.as_ref());
let mut builder = self
.update_handler
.update_builder()
.settings(&mut txn, self);
apply_settings_to_builder(settings, &mut builder);

View File

@@ -3,15 +3,14 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;
use async_stream::stream;
use chrono::Utc;
use futures::{lock::Mutex, stream::StreamExt};
use log::{error, trace};
use time::macros::format_description;
use time::OffsetDateTime;
use tokio::sync::{mpsc, oneshot, RwLock};
use super::error::{DumpActorError, Result};
use super::{DumpInfo, DumpJob, DumpMsg, DumpStatus};
use crate::tasks::Scheduler;
use crate::tasks::TaskStore;
use crate::update_file_store::UpdateFileStore;
pub const CONCURRENT_DUMP_MSG: usize = 10;
@@ -19,7 +18,7 @@ pub const CONCURRENT_DUMP_MSG: usize = 10;
pub struct DumpActor {
inbox: Option<mpsc::Receiver<DumpMsg>>,
update_file_store: UpdateFileStore,
scheduler: Arc<RwLock<Scheduler>>,
task_store: TaskStore,
dump_path: PathBuf,
analytics_path: PathBuf,
lock: Arc<Mutex<()>>,
@@ -30,18 +29,14 @@ pub struct DumpActor {
/// Generate uid from creation date
fn generate_uid() -> String {
OffsetDateTime::now_utc()
.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
))
.unwrap()
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
}
impl DumpActor {
pub fn new(
inbox: mpsc::Receiver<DumpMsg>,
update_file_store: UpdateFileStore,
scheduler: Arc<RwLock<Scheduler>>,
task_store: TaskStore,
dump_path: impl AsRef<Path>,
analytics_path: impl AsRef<Path>,
index_db_size: usize,
@@ -51,7 +46,7 @@ impl DumpActor {
let lock = Arc::new(Mutex::new(()));
Self {
inbox: Some(inbox),
scheduler,
task_store,
update_file_store,
dump_path: dump_path.as_ref().into(),
analytics_path: analytics_path.as_ref().into(),
@@ -123,13 +118,13 @@ impl DumpActor {
dump_path: self.dump_path.clone(),
db_path: self.analytics_path.clone(),
update_file_store: self.update_file_store.clone(),
scheduler: self.scheduler.clone(),
task_store: self.task_store.clone(),
uid: uid.clone(),
update_db_size: self.update_db_size,
index_db_size: self.index_db_size,
};
let task_result = tokio::task::spawn_local(task.run()).await;
let task_result = tokio::task::spawn(task.run()).await;
let mut dump_infos = self.dump_infos.write().await;
let dump_infos = dump_infos
@@ -159,33 +154,3 @@ impl DumpActor {
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_generate_uid() {
let current = OffsetDateTime::now_utc();
let uid = generate_uid();
let (date, time) = uid.split_once('-').unwrap();
let date = time::Date::parse(
date,
&format_description!("[year repr:full][month repr:numerical][day padding:zero]"),
)
.unwrap();
let time = time::Time::parse(
time,
&format_description!(
"[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
),
)
.unwrap();
let datetime = time::PrimitiveDateTime::new(date, time);
let datetime = datetime.assume_utc();
assert!(current - datetime < time::Duration::SECOND);
}
}

View File

@@ -4,13 +4,13 @@ pub mod v3;
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("asc(")
.and_then(|(_, tail)| tail.rsplit_once(')'))
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}
/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name.
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("desc(")
.and_then(|(_, tail)| tail.rsplit_once(')'))
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}

View File

@@ -1,8 +1,8 @@
use anyhow::bail;
use chrono::{DateTime, Utc};
use meilisearch_error::Code;
use milli::update::IndexDocumentsMethod;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use uuid::Uuid;
use crate::index::{Settings, Unchecked};
@@ -51,8 +51,7 @@ pub enum UpdateMeta {
pub struct Enqueued {
pub update_id: u64,
pub meta: UpdateMeta,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
pub enqueued_at: DateTime<Utc>,
pub content: Option<Uuid>,
}
@@ -60,8 +59,7 @@ pub struct Enqueued {
#[serde(rename_all = "camelCase")]
pub struct Processed {
pub success: UpdateResult,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
pub processed_at: DateTime<Utc>,
#[serde(flatten)]
pub from: Processing,
}
@@ -71,8 +69,7 @@ pub struct Processed {
pub struct Processing {
#[serde(flatten)]
pub from: Enqueued,
#[serde(with = "time::serde::rfc3339")]
pub started_processing_at: OffsetDateTime,
pub started_processing_at: DateTime<Utc>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
@@ -80,8 +77,7 @@ pub struct Processing {
pub struct Aborted {
#[serde(flatten)]
pub from: Enqueued,
#[serde(with = "time::serde::rfc3339")]
pub aborted_at: OffsetDateTime,
pub aborted_at: DateTime<Utc>,
}
#[derive(Debug, Serialize, Deserialize)]
@@ -90,8 +86,7 @@ pub struct Failed {
#[serde(flatten)]
pub from: Processing,
pub error: ResponseError,
#[serde(with = "time::serde::rfc3339")]
pub failed_at: OffsetDateTime,
pub failed_at: DateTime<Utc>,
}
#[derive(Debug, Serialize, Deserialize)]

View File

@@ -1,7 +1,7 @@
use chrono::{DateTime, Utc};
use meilisearch_error::{Code, ResponseError};
use milli::update::IndexDocumentsMethod;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use uuid::Uuid;
use crate::index::{Settings, Unchecked};
@@ -107,8 +107,7 @@ pub enum UpdateMeta {
pub struct Enqueued {
pub update_id: u64,
pub meta: Update,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
pub enqueued_at: DateTime<Utc>,
}
impl Enqueued {
@@ -123,8 +122,7 @@ impl Enqueued {
#[serde(rename_all = "camelCase")]
pub struct Processed {
pub success: v2::UpdateResult,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
pub processed_at: DateTime<Utc>,
#[serde(flatten)]
pub from: Processing,
}
@@ -146,8 +144,7 @@ impl Processed {
pub struct Processing {
#[serde(flatten)]
pub from: Enqueued,
#[serde(with = "time::serde::rfc3339")]
pub started_processing_at: OffsetDateTime,
pub started_processing_at: DateTime<Utc>,
}
impl Processing {
@@ -166,8 +163,7 @@ pub struct Failed {
pub from: Processing,
pub msg: String,
pub code: Code,
#[serde(with = "time::serde::rfc3339")]
pub failed_at: OffsetDateTime,
pub failed_at: DateTime<Utc>,
}
impl Failed {

View File

@@ -1,5 +1,4 @@
use std::path::Path;
use std::sync::Arc;
use heed::EnvOpenOptions;
use log::info;
@@ -28,7 +27,7 @@ pub fn load_dump(
let mut options = EnvOpenOptions::new();
options.map_size(meta_env_size);
options.max_dbs(100);
let env = Arc::new(options.open(&dst)?);
let env = options.open(&dst)?;
IndexResolver::load_dump(
src.as_ref(),

View File

@@ -1,19 +1,16 @@
use std::fs::File;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use anyhow::bail;
use log::{info, trace};
use chrono::{DateTime, Utc};
use log::{info, trace, warn};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
pub use actor::DumpActor;
pub use handle_impl::*;
use meilisearch_auth::AuthController;
pub use message::DumpMsg;
use tempfile::TempDir;
use tokio::fs::create_dir_all;
use tokio::sync::{oneshot, RwLock};
use tokio::sync::oneshot;
use crate::analytics;
use crate::compression::{from_tar_gz, to_tar_gz};
@@ -21,7 +18,7 @@ use crate::index_controller::dump_actor::error::DumpActorError;
use crate::index_controller::dump_actor::loaders::{v2, v3, v4};
use crate::options::IndexerOpts;
use crate::tasks::task::Job;
use crate::tasks::Scheduler;
use crate::tasks::TaskStore;
use crate::update_file_store::UpdateFileStore;
use error::Result;
@@ -40,8 +37,7 @@ pub struct Metadata {
db_version: String,
index_db_size: usize,
update_db_size: usize,
#[serde(with = "time::serde::rfc3339")]
dump_date: OffsetDateTime,
dump_date: DateTime<Utc>,
}
impl Metadata {
@@ -50,7 +46,7 @@ impl Metadata {
db_version: env!("CARGO_PKG_VERSION").to_string(),
index_db_size,
update_db_size,
dump_date: OffsetDateTime::now_utc(),
dump_date: Utc::now(),
}
}
}
@@ -83,47 +79,6 @@ pub enum MetadataVersion {
}
impl MetadataVersion {
pub fn load_dump(
self,
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
index_db_size: usize,
meta_env_size: usize,
indexing_options: &IndexerOpts,
) -> anyhow::Result<()> {
match self {
MetadataVersion::V1(_meta) => {
anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")
}
MetadataVersion::V2(meta) => v2::load_dump(
meta,
src,
dst,
index_db_size,
meta_env_size,
indexing_options,
)?,
MetadataVersion::V3(meta) => v3::load_dump(
meta,
src,
dst,
index_db_size,
meta_env_size,
indexing_options,
)?,
MetadataVersion::V4(meta) => v4::load_dump(
meta,
src,
dst,
index_db_size,
meta_env_size,
indexing_options,
)?,
}
Ok(())
}
pub fn new_v4(index_db_size: usize, update_db_size: usize) -> Self {
let meta = Metadata::new(index_db_size, update_db_size);
Self::V4(meta)
@@ -145,7 +100,7 @@ impl MetadataVersion {
}
}
pub fn dump_date(&self) -> Option<&OffsetDateTime> {
pub fn dump_date(&self) -> Option<&DateTime<Utc>> {
match self {
MetadataVersion::V1(_) => None,
MetadataVersion::V2(meta) | MetadataVersion::V3(meta) | MetadataVersion::V4(meta) => {
@@ -170,13 +125,9 @@ pub struct DumpInfo {
pub status: DumpStatus,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(with = "time::serde::rfc3339")]
started_at: OffsetDateTime,
#[serde(
skip_serializing_if = "Option::is_none",
with = "time::serde::rfc3339::option"
)]
finished_at: Option<OffsetDateTime>,
started_at: DateTime<Utc>,
#[serde(skip_serializing_if = "Option::is_none")]
finished_at: Option<DateTime<Utc>>,
}
impl DumpInfo {
@@ -185,19 +136,19 @@ impl DumpInfo {
uid,
status,
error: None,
started_at: OffsetDateTime::now_utc(),
started_at: Utc::now(),
finished_at: None,
}
}
pub fn with_error(&mut self, error: String) {
self.status = DumpStatus::Failed;
self.finished_at = Some(OffsetDateTime::now_utc());
self.finished_at = Some(Utc::now());
self.error = Some(error);
}
pub fn done(&mut self) {
self.finished_at = Some(OffsetDateTime::now_utc());
self.finished_at = Some(Utc::now());
self.status = DumpStatus::Done;
}
@@ -209,46 +160,10 @@ impl DumpInfo {
pub fn load_dump(
dst_path: impl AsRef<Path>,
src_path: impl AsRef<Path>,
ignore_dump_if_db_exists: bool,
ignore_missing_dump: bool,
index_db_size: usize,
update_db_size: usize,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<()> {
let empty_db = crate::is_empty_db(&dst_path);
let src_path_exists = src_path.as_ref().exists();
if empty_db && src_path_exists {
let (tmp_src, tmp_dst, meta) = extract_dump(&dst_path, &src_path)?;
meta.load_dump(
tmp_src.path(),
tmp_dst.path(),
index_db_size,
update_db_size,
indexer_opts,
)?;
persist_dump(&dst_path, tmp_dst)?;
Ok(())
} else if !empty_db && !ignore_dump_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
dst_path
.as_ref()
.canonicalize()
.unwrap_or_else(|_| dst_path.as_ref().to_owned())
)
} else if !src_path_exists && !ignore_missing_dump {
bail!("dump doesn't exist at {:?}", src_path.as_ref())
} else {
// there is nothing to do
Ok(())
}
}
fn extract_dump(
dst_path: impl AsRef<Path>,
src_path: impl AsRef<Path>,
) -> anyhow::Result<(TempDir, TempDir, MetadataVersion)> {
// Setup a temp directory path in the same path as the database, to prevent cross devices
// references.
let temp_path = dst_path
@@ -271,11 +186,7 @@ fn extract_dump(
let mut meta_file = File::open(&meta_path)?;
let meta: MetadataVersion = serde_json::from_reader(&mut meta_file)?;
if !dst_path.as_ref().exists() {
std::fs::create_dir_all(dst_path.as_ref())?;
}
let tmp_dst = tempfile::tempdir_in(dst_path.as_ref())?;
let tmp_dst = tempfile::tempdir()?;
info!(
"Loading dump {}, dump database version: {}, dump version: {}",
@@ -286,37 +197,43 @@ fn extract_dump(
meta.version()
);
Ok((tmp_src, tmp_dst, meta))
}
fn persist_dump(dst_path: impl AsRef<Path>, tmp_dst: TempDir) -> anyhow::Result<()> {
let persisted_dump = tmp_dst.into_path();
// Delete everything in the `data.ms` except the tempdir.
if dst_path.as_ref().exists() {
for file in dst_path.as_ref().read_dir().unwrap() {
let file = file.unwrap().path();
if file.file_name() == persisted_dump.file_name() {
continue;
}
if file.is_file() {
std::fs::remove_file(&file)?;
} else {
std::fs::remove_dir_all(&file)?;
}
match meta {
MetadataVersion::V1(_meta) => {
anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")
}
MetadataVersion::V2(meta) => v2::load_dump(
meta,
&tmp_src_path,
tmp_dst.path(),
index_db_size,
update_db_size,
indexer_opts,
)?,
MetadataVersion::V3(meta) => v3::load_dump(
meta,
&tmp_src_path,
tmp_dst.path(),
index_db_size,
update_db_size,
indexer_opts,
)?,
MetadataVersion::V4(meta) => v4::load_dump(
meta,
&tmp_src_path,
tmp_dst.path(),
index_db_size,
update_db_size,
indexer_opts,
)?,
}
// Persist and atomically rename the db
let persisted_dump = tmp_dst.into_path();
if dst_path.as_ref().exists() {
warn!("Overwriting database at {}", dst_path.as_ref().display());
std::fs::remove_dir_all(&dst_path)?;
}
// Move the whole content of the tempdir into the `data.ms`.
for file in persisted_dump.read_dir().unwrap() {
let file = file.unwrap().path();
std::fs::rename(&file, &dst_path.as_ref().join(file.file_name().unwrap()))?;
}
// Delete the empty tempdir.
std::fs::remove_dir_all(&persisted_dump)?;
std::fs::rename(&persisted_dump, &dst_path)?;
Ok(())
}
@@ -325,7 +242,7 @@ struct DumpJob {
dump_path: PathBuf,
db_path: PathBuf,
update_file_store: UpdateFileStore,
scheduler: Arc<RwLock<Scheduler>>,
task_store: TaskStore,
uid: String,
update_db_size: usize,
index_db_size: usize,
@@ -350,27 +267,19 @@ impl DumpJob {
let (sender, receiver) = oneshot::channel();
self.scheduler
.write()
.await
.schedule_job(Job::Dump {
self.task_store
.register_job(Job::Dump {
ret: sender,
path: temp_dump_path.clone(),
})
.await;
// wait until the job has started performing before finishing the dump process
let sender = receiver.await??;
AuthController::dump(&self.db_path, &temp_dump_path)?;
//TODO(marin): this is not right, the scheduler should dump itself, not do it here...
self.scheduler
.read()
.await
receiver.await??;
self.task_store
.dump(&temp_dump_path, self.update_file_store.clone())
.await?;
AuthController::dump(&self.db_path, &temp_dump_path)?;
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
// for now we simply copy the updates/updates_files
// FIXME: We may copy more files than necessary, if new files are added while we are
@@ -381,15 +290,23 @@ impl DumpJob {
.map_err(|e| DumpActorError::Internal(e.into()))?;
let dump_path = self.dump_path.join(self.uid).with_extension("dump");
temp_dump_file.persist(&dump_path)?;
log::info!("temp_dump_file: {:?}", temp_dump_file);
log::info!("temp dir: {:?}", temp_dump_dir);
let res = temp_dump_file.persist(&dump_path);
if let Err(error) = res {
log::info!("could not persist: {:?}", error.file);
log::info!("underlying error {}", error.error);
log::info!("underlying error (dbg) {}", error.error);
log::info!("underlying error kind {}", error.error.kind());
return Err(error.into());
}
log::info!("dump path: {}", dump_path.display());
Ok(dump_path)
})
.await??;
// notify the update loop that we are finished performing the dump.
let _ = sender.send(());
info!("Created dump in {:?}.", dump_path);
Ok(())
@@ -398,15 +315,19 @@ impl DumpJob {
#[cfg(test)]
mod test {
use std::collections::HashSet;
use futures::future::{err, ok};
use nelson::Mocker;
use once_cell::sync::Lazy;
use uuid::Uuid;
use super::*;
use crate::index::error::Result as IndexResult;
use crate::index::Index;
use crate::index_resolver::error::IndexResolverError;
use crate::options::SchedulerConfig;
use crate::tasks::error::Result as TaskResult;
use crate::tasks::task::{Task, TaskId};
use crate::tasks::{MockTaskPerformer, TaskFilter, TaskStore};
use crate::index_resolver::index_store::MockIndexStore;
use crate::index_resolver::meta_store::MockIndexMetaStore;
use crate::update_file_store::UpdateFileStore;
fn setup() {
@@ -423,91 +344,86 @@ mod test {
}
#[actix_rt::test]
#[ignore]
async fn test_dump_normal() {
setup();
let tmp = tempfile::tempdir().unwrap();
let uuids = std::iter::repeat_with(Uuid::new_v4)
.take(4)
.collect::<HashSet<_>>();
let mut uuid_store = MockIndexMetaStore::new();
uuid_store
.expect_dump()
.once()
.returning(move |_| Box::pin(ok(())));
let mut index_store = MockIndexStore::new();
index_store.expect_get().times(4).returning(move |uuid| {
let mocker = Mocker::default();
let uuids_clone = uuids.clone();
mocker.when::<(), Uuid>("uuid").once().then(move |_| {
assert!(uuids_clone.contains(&uuid));
uuid
});
mocker
.when::<&Path, IndexResult<()>>("dump")
.once()
.then(move |_| Ok(()));
Box::pin(ok(Some(Index::mock(mocker))))
});
let mocker = Mocker::default();
let update_file_store = UpdateFileStore::mock(mocker);
let mut performer = MockTaskPerformer::new();
performer
.expect_process_job()
.once()
.returning(|j| match j {
Job::Dump { ret, .. } => {
let (sender, _receiver) = oneshot::channel();
ret.send(Ok(sender)).unwrap();
}
_ => unreachable!(),
});
let performer = Arc::new(performer);
let mocker = Mocker::default();
mocker
.when::<(&Path, UpdateFileStore), TaskResult<()>>("dump")
.then(|_| Ok(()));
mocker
.when::<(Option<TaskId>, Option<TaskFilter>, Option<usize>), TaskResult<Vec<Task>>>(
"list_tasks",
)
.then(|_| Ok(Vec::new()));
let store = TaskStore::mock(mocker);
let config = SchedulerConfig::default();
//let update_sender =
// create_update_handler(index_resolver.clone(), tmp.path(), 4096 * 100).unwrap();
let scheduler = Scheduler::new(store, performer, config).unwrap();
//TODO: fix dump tests
let mocker = Mocker::default();
let task_store = TaskStore::mock(mocker);
let task = DumpJob {
dump_path: tmp.path().into(),
// this should do nothing
update_file_store,
db_path: tmp.path().into(),
task_store,
uid: String::from("test"),
update_db_size: 4096 * 10,
index_db_size: 4096 * 10,
scheduler,
};
task.run().await.unwrap();
}
#[actix_rt::test]
#[ignore]
async fn error_performing_dump() {
let tmp = tempfile::tempdir().unwrap();
let mut uuid_store = MockIndexMetaStore::new();
uuid_store
.expect_dump()
.once()
.returning(move |_| Box::pin(err(IndexResolverError::ExistingPrimaryKey)));
let mocker = Mocker::default();
let file_store = UpdateFileStore::mock(mocker);
let mocker = Mocker::default();
mocker
.when::<(Option<TaskId>, Option<TaskFilter>, Option<usize>), TaskResult<Vec<Task>>>(
"list_tasks",
)
.then(|_| Ok(Vec::new()));
let task_store = TaskStore::mock(mocker);
let mut performer = MockTaskPerformer::new();
performer
.expect_process_job()
.once()
.returning(|job| match job {
Job::Dump { ret, .. } => drop(ret.send(Err(IndexResolverError::BadlyFormatted(
"blabla".to_string(),
)))),
_ => unreachable!(),
});
let performer = Arc::new(performer);
let scheduler = Scheduler::new(task_store, performer, SchedulerConfig::default()).unwrap();
let task = DumpJob {
dump_path: tmp.path().into(),
// this should do nothing
db_path: tmp.path().into(),
update_file_store: file_store,
task_store,
uid: String::from("test"),
update_db_size: 4096 * 10,
index_db_size: 4096 * 10,
scheduler,
};
assert!(task.run().await.is_err());

View File

@@ -1,4 +1,3 @@
use meilisearch_auth::SearchRules;
use std::collections::BTreeMap;
use std::fmt;
use std::io::Cursor;
@@ -8,12 +7,12 @@ use std::time::Duration;
use actix_web::error::PayloadError;
use bytes::Bytes;
use chrono::{DateTime, Utc};
use futures::Stream;
use futures::StreamExt;
use milli::update::IndexDocumentsMethod;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use tokio::sync::{mpsc, RwLock};
use tokio::sync::mpsc;
use tokio::task::spawn_blocking;
use tokio::time::sleep;
use uuid::Uuid;
@@ -23,11 +22,12 @@ use crate::index::{
Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked,
};
use crate::index_controller::dump_actor::{load_dump, DumpActor, DumpActorHandleImpl};
use crate::options::{IndexerOpts, SchedulerConfig};
use crate::options::IndexerOpts;
use crate::snapshot::{load_snapshot, SnapshotService};
use crate::tasks::create_task_store;
use crate::tasks::error::TaskError;
use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId};
use crate::tasks::{Scheduler, TaskFilter, TaskStore};
use crate::tasks::{TaskFilter, TaskStore};
use error::Result;
use self::dump_actor::{DumpActorHandle, DumpInfo};
@@ -48,13 +48,6 @@ pub type Payload = Box<
dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin,
>;
pub fn open_meta_env(path: &Path, size: usize) -> heed::Result<heed::Env> {
let mut options = heed::EnvOpenOptions::new();
options.map_size(size);
options.max_dbs(20);
options.open(path)
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
@@ -74,7 +67,6 @@ pub struct IndexSettings {
pub struct IndexController<U, I> {
index_resolver: Arc<IndexResolver<U, I>>,
scheduler: Arc<RwLock<Scheduler>>,
task_store: TaskStore,
dump_handle: dump_actor::DumpActorHandleImpl,
update_file_store: UpdateFileStore,
@@ -85,10 +77,9 @@ impl<U, I> Clone for IndexController<U, I> {
fn clone(&self) -> Self {
Self {
index_resolver: self.index_resolver.clone(),
scheduler: self.scheduler.clone(),
task_store: self.task_store.clone(),
dump_handle: self.dump_handle.clone(),
update_file_store: self.update_file_store.clone(),
task_store: self.task_store.clone(),
}
}
}
@@ -114,8 +105,7 @@ impl fmt::Display for DocumentAdditionFormat {
#[serde(rename_all = "camelCase")]
pub struct Stats {
pub database_size: u64,
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
pub last_update: Option<OffsetDateTime>,
pub last_update: Option<DateTime<Utc>>,
pub indexes: BTreeMap<String, IndexStats>,
}
@@ -160,8 +150,6 @@ pub struct IndexControllerBuilder {
schedule_snapshot: bool,
dump_src: Option<PathBuf>,
dump_dst: Option<PathBuf>,
ignore_dump_if_db_exists: bool,
ignore_missing_dump: bool,
}
impl IndexControllerBuilder {
@@ -169,7 +157,6 @@ impl IndexControllerBuilder {
self,
db_path: impl AsRef<Path>,
indexer_options: IndexerOpts,
scheduler_config: SchedulerConfig,
) -> anyhow::Result<MeiliSearch> {
let index_size = self
.max_index_size
@@ -199,8 +186,6 @@ impl IndexControllerBuilder {
load_dump(
db_path.as_ref(),
src_path,
self.ignore_dump_if_db_exists,
self.ignore_missing_dump,
index_size,
task_store_size,
&indexer_options,
@@ -209,7 +194,11 @@ impl IndexControllerBuilder {
std::fs::create_dir_all(db_path.as_ref())?;
let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?);
let mut options = heed::EnvOpenOptions::new();
options.map_size(task_store_size);
options.max_dbs(20);
let meta_env = options.open(&db_path)?;
let update_file_store = UpdateFileStore::new(&db_path)?;
// Create or overwrite the version file for this DB
@@ -223,9 +212,8 @@ impl IndexControllerBuilder {
update_file_store.clone(),
)?);
let task_store = TaskStore::new(meta_env)?;
let scheduler =
Scheduler::new(task_store.clone(), index_resolver.clone(), scheduler_config)?;
let task_store =
create_task_store(meta_env, index_resolver.clone()).map_err(|e| anyhow::anyhow!(e))?;
let dump_path = self
.dump_dst
@@ -236,14 +224,14 @@ impl IndexControllerBuilder {
let actor = DumpActor::new(
receiver,
update_file_store.clone(),
scheduler.clone(),
task_store.clone(),
dump_path,
analytics_path,
index_size,
task_store_size,
);
tokio::task::spawn_local(actor.run());
tokio::task::spawn(actor.run());
DumpActorHandleImpl { sender }
};
@@ -262,18 +250,17 @@ impl IndexControllerBuilder {
snapshot_path,
index_size,
meta_env_size: task_store_size,
scheduler: scheduler.clone(),
task_store: task_store.clone(),
};
tokio::task::spawn_local(snapshot_service.run());
tokio::task::spawn(snapshot_service.run());
}
Ok(IndexController {
index_resolver,
scheduler,
task_store,
dump_handle,
update_file_store,
task_store,
})
}
@@ -309,6 +296,18 @@ impl IndexControllerBuilder {
self
}
/// Set the index controller builder's dump src.
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
self.dump_src.replace(dump_src);
self
}
/// Set the index controller builder's dump dst.
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
self.dump_dst.replace(dump_dst);
self
}
/// Set the index controller builder's import snapshot.
pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self {
self.import_snapshot.replace(import_snapshot);
@@ -326,30 +325,6 @@ impl IndexControllerBuilder {
self.schedule_snapshot = true;
self
}
/// Set the index controller builder's dump src.
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
self.dump_src.replace(dump_src);
self
}
/// Set the index controller builder's dump dst.
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
self.dump_dst.replace(dump_dst);
self
}
/// Set the index controller builder's ignore dump if db exists.
pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self {
self.ignore_dump_if_db_exists = ignore_dump_if_db_exists;
self
}
/// Set the index controller builder's ignore missing dump.
pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self {
self.ignore_missing_dump = ignore_missing_dump;
self
}
}
impl<U, I> IndexController<U, I>
@@ -423,13 +398,12 @@ where
};
let task = self.task_store.register(uid, content).await?;
self.scheduler.read().await.notify();
Ok(task)
}
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
let task = self.scheduler.read().await.get_task(id, filter).await?;
let task = self.task_store.get_task(id, filter).await?;
Ok(task)
}
@@ -444,12 +418,7 @@ where
let mut filter = TaskFilter::default();
filter.filter_index(index_uid);
let task = self
.scheduler
.read()
.await
.get_task(task_id, Some(filter))
.await?;
let task = self.task_store.get_task(task_id, Some(filter)).await?;
Ok(task)
}
@@ -460,12 +429,7 @@ where
limit: Option<usize>,
offset: Option<TaskId>,
) -> Result<Vec<Task>> {
let tasks = self
.scheduler
.read()
.await
.list_tasks(offset, filter, limit)
.await?;
let tasks = self.task_store.list_tasks(offset, filter, limit).await?;
Ok(tasks)
}
@@ -485,9 +449,7 @@ where
filter.filter_index(index_uid);
let tasks = self
.scheduler
.read()
.await
.task_store
.list_tasks(
Some(offset.unwrap_or_default() + task_id),
Some(filter),
@@ -568,11 +530,10 @@ where
}
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?;
let last_task = self.task_store.get_processing_task().await?;
// Check if the currently indexing update is from our index.
let is_indexing = processing_tasks
.first()
.map(|task| task.index_uid.as_str() == uid)
let is_indexing = last_task
.map(|task| task.index_uid.into_inner() == uid)
.unwrap_or_default();
let index = self.index_resolver.get_index(uid).await?;
@@ -582,14 +543,17 @@ where
Ok(stats)
}
pub async fn get_all_stats(&self, search_rules: &SearchRules) -> Result<Stats> {
let mut last_task: Option<OffsetDateTime> = None;
pub async fn get_all_stats(&self, index_filter: &Option<Vec<String>>) -> Result<Stats> {
let mut last_task: Option<DateTime<_>> = None;
let mut indexes = BTreeMap::new();
let mut database_size = 0;
let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?;
let processing_task = self.task_store.get_processing_task().await?;
for (index_uid, index) in self.index_resolver.list().await? {
if !search_rules.is_index_authorized(&index_uid) {
if index_filter
.as_ref()
.map_or(false, |filter| !filter.contains(&index_uid))
{
continue;
}
@@ -606,8 +570,8 @@ where
});
// Check if the currently indexing update is from our index.
stats.is_indexing = processing_tasks
.first()
stats.is_indexing = processing_task
.as_ref()
.map(|p| p.index_uid.as_str() == index_uid)
.or(Some(false));
@@ -659,18 +623,16 @@ mod test {
impl IndexController<MockIndexMetaStore, MockIndexStore> {
pub fn mock(
index_resolver: Arc<IndexResolver<MockIndexMetaStore, MockIndexStore>>,
index_resolver: IndexResolver<MockIndexMetaStore, MockIndexStore>,
task_store: TaskStore,
update_file_store: UpdateFileStore,
dump_handle: DumpActorHandleImpl,
scheduler: Arc<RwLock<Scheduler>>,
) -> Self {
IndexController {
index_resolver,
index_resolver: Arc::new(index_resolver),
task_store,
dump_handle,
update_file_store,
scheduler,
}
}
}
@@ -743,27 +705,13 @@ mod test {
let task_store_mocker = nelson::Mocker::default();
let mocker = Mocker::default();
let update_file_store = UpdateFileStore::mock(mocker);
let index_resolver = Arc::new(IndexResolver::new(
uuid_store,
index_store,
update_file_store.clone(),
));
let index_resolver = IndexResolver::new(uuid_store, index_store, update_file_store.clone());
let task_store = TaskStore::mock(task_store_mocker);
let scheduler = Scheduler::new(
task_store.clone(),
index_resolver.clone(),
SchedulerConfig::default(),
)
.unwrap();
// let dump_actor = MockDumpActorHandle::new();
let (sender, _) = mpsc::channel(1);
let dump_handle = DumpActorHandleImpl { sender };
let index_controller = IndexController::mock(
index_resolver,
task_store,
update_file_store,
dump_handle,
scheduler,
);
let index_controller =
IndexController::mock(index_resolver, task_store, update_file_store, dump_handle);
let r = index_controller
.search(index_uid.to_owned(), query.clone())

View File

@@ -1,14 +1,11 @@
#[derive(thiserror::Error, Debug)]
pub enum VersionFileError {
#[error(
"Meilisearch (v{}) failed to infer the version of the database. Please consider using a dump to load your data.",
env!("CARGO_PKG_VERSION").to_string()
)]
#[error("Version file is missing or the previous MeiliSearch engine version was below 0.24.0. Use a dump to update MeiliSearch.")]
MissingVersionFile,
#[error("Version file is corrupted and thus Meilisearch is unable to determine the version of the database.")]
#[error("Version file is corrupted and thus MeiliSearch is unable to determine the version of the database.")]
MalformedVersionFile,
#[error(
"Expected Meilisearch engine version: {major}.{minor}.{patch}, current engine version: {}. To update Meilisearch use a dump.",
"Expected MeiliSearch engine version: {major}.{minor}.{patch}, current engine version: {}. To update MeiliSearch use a dump.",
env!("CARGO_PKG_VERSION").to_string()
)]
VersionMismatch {

View File

@@ -12,7 +12,7 @@ static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR");
static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
// Persists the version of the current Meilisearch binary to a VERSION file
// Persists the version of the current MeiliSearch binary to a VERSION file
pub fn create_version_file(db_path: &Path) -> anyhow::Result<()> {
let version_path = db_path.join(VERSION_FILE_NAME);
fs::write(
@@ -23,7 +23,7 @@ pub fn create_version_file(db_path: &Path) -> anyhow::Result<()> {
Ok(())
}
// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch.
// Ensures MeiliSearch version is compatible with the database, returns an error versions mismatch.
pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
let version_path = db_path.join(VERSION_FILE_NAME);

View File

@@ -1,15 +1,14 @@
use std::collections::HashMap;
use std::convert::TryFrom;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use milli::update::IndexerConfig;
use tokio::fs;
use tokio::sync::RwLock;
use tokio::task::spawn_blocking;
use uuid::Uuid;
use super::error::{IndexResolverError, Result};
use crate::index::update_handler::UpdateHandler;
use crate::index::Index;
use crate::options::IndexerOpts;
@@ -27,7 +26,7 @@ pub struct MapIndexStore {
index_store: AsyncMap<Uuid, Index>,
path: PathBuf,
index_size: usize,
indexer_config: Arc<IndexerConfig>,
update_handler: Arc<UpdateHandler>,
}
impl MapIndexStore {
@@ -36,14 +35,14 @@ impl MapIndexStore {
index_size: usize,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<Self> {
let indexer_config = Arc::new(IndexerConfig::try_from(indexer_opts)?);
let update_handler = Arc::new(UpdateHandler::new(indexer_opts)?);
let path = path.as_ref().join("indexes/");
let index_store = Arc::new(RwLock::new(HashMap::new()));
Ok(Self {
index_store,
path,
index_size,
indexer_config,
update_handler,
})
}
}
@@ -64,7 +63,7 @@ impl IndexStore for MapIndexStore {
}
let index_size = self.index_size;
let update_handler = self.indexer_config.clone();
let update_handler = self.update_handler.clone();
let index = spawn_blocking(move || -> Result<Index> {
let index = Index::open(path, index_size, uuid, update_handler)?;
Ok(index)
@@ -89,7 +88,7 @@ impl IndexStore for MapIndexStore {
}
let index_size = self.index_size;
let update_handler = self.indexer_config.clone();
let update_handler = self.update_handler.clone();
let index =
spawn_blocking(move || Index::open(path, index_size, uuid, update_handler))
.await??;

View File

@@ -2,7 +2,6 @@ use std::collections::HashSet;
use std::fs::{create_dir_all, File};
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use heed::types::{SerdeBincode, Str};
use heed::{CompactionOption, Database, Env};
@@ -43,20 +42,12 @@ pub struct IndexMeta {
#[derive(Clone)]
pub struct HeedMetaStore {
env: Arc<Env>,
env: Env,
db: Database<Str, SerdeBincode<IndexMeta>>,
}
impl Drop for HeedMetaStore {
fn drop(&mut self) {
if Arc::strong_count(&self.env) == 1 {
self.env.as_ref().clone().prepare_for_closing();
}
}
}
impl HeedMetaStore {
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
pub fn new(env: heed::Env) -> Result<Self> {
let db = env.create_database(Some("uuids"))?;
Ok(Self { env, db })
}
@@ -153,7 +144,7 @@ impl HeedMetaStore {
Ok(())
}
pub fn load_dump(src: impl AsRef<Path>, env: Arc<heed::Env>) -> Result<()> {
pub fn load_dump(src: impl AsRef<Path>, env: heed::Env) -> Result<()> {
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
let indexes = File::open(&src_indexes)?;
let mut indexes = BufReader::new(indexes);

View File

@@ -2,26 +2,26 @@ pub mod error;
pub mod index_store;
pub mod meta_store;
use std::convert::{TryFrom, TryInto};
use std::convert::TryInto;
use std::path::Path;
use std::sync::Arc;
use chrono::Utc;
use error::{IndexResolverError, Result};
use heed::Env;
use index_store::{IndexStore, MapIndexStore};
use meilisearch_error::ResponseError;
use meta_store::{HeedMetaStore, IndexMetaStore};
use milli::update::{DocumentDeletionResult, IndexerConfig};
use milli::update::DocumentDeletionResult;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use tokio::sync::oneshot;
use tokio::task::spawn_blocking;
use uuid::Uuid;
use crate::index::update_handler::UpdateHandler;
use crate::index::{error::Result as IndexResult, Index};
use crate::options::IndexerOpts;
use crate::tasks::batch::Batch;
use crate::tasks::task::{DocumentDeletion, Job, Task, TaskContent, TaskEvent, TaskId, TaskResult};
use crate::tasks::Pending;
use crate::tasks::TaskPerformer;
use crate::update_file_store::UpdateFileStore;
@@ -39,7 +39,7 @@ pub fn create_index_resolver(
path: impl AsRef<Path>,
index_size: usize,
indexer_opts: &IndexerOpts,
meta_env: Arc<heed::Env>,
meta_env: heed::Env,
file_store: UpdateFileStore,
) -> anyhow::Result<HardStateIndexResolver> {
let uuid_store = HeedMetaStore::new(meta_env)?;
@@ -97,46 +97,38 @@ where
U: IndexMetaStore + Send + Sync + 'static,
I: IndexStore + Send + Sync + 'static,
{
async fn process_batch(&self, mut batch: Batch) -> Batch {
// If a batch contains multiple tasks, then it must be a document addition batch
if let Some(Task {
content: TaskContent::DocumentAddition { .. },
..
}) = batch.tasks.first()
{
debug_assert!(batch.tasks.iter().all(|t| matches!(
t,
Task {
content: TaskContent::DocumentAddition { .. },
..
}
)));
type Error = ResponseError;
self.process_document_addition_batch(batch).await
} else {
if let Some(task) = batch.tasks.first_mut() {
task.events
.push(TaskEvent::Processing(OffsetDateTime::now_utc()));
async fn process(&self, mut batch: Batch) -> Batch {
// Until batching is implemented, all batch should contain only one update.
debug_assert_eq!(batch.len(), 1);
match batch.tasks.first_mut() {
Some(Pending::Task(task)) => {
task.events.push(TaskEvent::Processing(Utc::now()));
match self.process_task(task).await {
Ok(success) => {
task.events.push(TaskEvent::Succeded {
result: success,
timestamp: OffsetDateTime::now_utc(),
timestamp: Utc::now(),
});
}
Err(err) => task.events.push(TaskEvent::Failed {
error: err.into(),
timestamp: OffsetDateTime::now_utc(),
timestamp: Utc::now(),
}),
}
}
batch
}
}
Some(Pending::Job(job)) => {
let job = std::mem::take(job);
self.process_job(job).await;
}
async fn process_job(&self, job: Job) {
self.process_job(job).await;
None => (),
}
batch
}
async fn finish(&self, batch: &Batch) {
@@ -161,15 +153,15 @@ impl IndexResolver<HeedMetaStore, MapIndexStore> {
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
index_db_size: usize,
env: Arc<Env>,
env: Env,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<()> {
HeedMetaStore::load_dump(&src, env)?;
let indexes_path = src.as_ref().join("indexes");
let indexes = indexes_path.read_dir()?;
let indexer_config = IndexerConfig::try_from(indexer_opts)?;
let update_handler = UpdateHandler::new(indexer_opts)?;
for index in indexes {
Index::load_dump(&index?.path(), &dst, index_db_size, &indexer_config)?;
Index::load_dump(&index?.path(), &dst, index_db_size, &update_handler)?;
}
Ok(())
@@ -189,100 +181,33 @@ where
}
}
async fn process_document_addition_batch(&self, mut batch: Batch) -> Batch {
fn get_content_uuid(task: &Task) -> Uuid {
match task {
Task {
content: TaskContent::DocumentAddition { content_uuid, .. },
..
} => *content_uuid,
_ => panic!("unexpected task in the document addition batch"),
}
}
let content_uuids = batch.tasks.iter().map(get_content_uuid).collect::<Vec<_>>();
match batch.tasks.first() {
Some(Task {
index_uid,
id,
content:
TaskContent::DocumentAddition {
merge_strategy,
primary_key,
allow_index_creation,
..
},
..
}) => {
let primary_key = primary_key.clone();
let method = *merge_strategy;
let index = if *allow_index_creation {
self.get_or_create_index(index_uid.clone(), *id).await
} else {
self.get_index(index_uid.as_str().to_string()).await
};
// If the index doesn't exist and we are not allowed to create it with the first
// task, we must fails the whole batch.
let now = OffsetDateTime::now_utc();
let index = match index {
Ok(index) => index,
Err(e) => {
let error = ResponseError::from(e);
for task in batch.tasks.iter_mut() {
task.events.push(TaskEvent::Failed {
error: error.clone(),
timestamp: now,
});
}
return batch;
}
};
let file_store = self.file_store.clone();
let result = spawn_blocking(move || {
index.update_documents(
method,
primary_key,
file_store,
content_uuids.into_iter(),
)
})
.await;
let event = match result {
Ok(Ok(result)) => TaskEvent::Succeded {
timestamp: OffsetDateTime::now_utc(),
result: TaskResult::DocumentAddition {
indexed_documents: result.indexed_documents,
},
},
Ok(Err(e)) => TaskEvent::Failed {
timestamp: OffsetDateTime::now_utc(),
error: e.into(),
},
Err(e) => TaskEvent::Failed {
timestamp: OffsetDateTime::now_utc(),
error: IndexResolverError::from(e).into(),
},
};
for task in batch.tasks.iter_mut() {
task.events.push(event.clone());
}
batch
}
_ => panic!("invalid batch!"),
}
}
async fn process_task(&self, task: &Task) -> Result<TaskResult> {
let index_uid = task.index_uid.clone();
match &task.content {
TaskContent::DocumentAddition { .. } => panic!("updates should be handled by batch"),
TaskContent::DocumentAddition {
content_uuid,
merge_strategy,
primary_key,
allow_index_creation,
..
} => {
let primary_key = primary_key.clone();
let content_uuid = *content_uuid;
let method = *merge_strategy;
let index = if *allow_index_creation {
self.get_or_create_index(index_uid, task.id).await?
} else {
self.get_index(index_uid.into_inner()).await?
};
let file_store = self.file_store.clone();
let result = spawn_blocking(move || {
index.update_documents(method, content_uuid, primary_key, file_store)
})
.await??;
Ok(result.into())
}
TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids)) => {
let ids = ids.clone();
let index = self.get_index(index_uid.into_inner()).await?;
@@ -358,13 +283,9 @@ where
Job::Dump { ret, path } => {
log::trace!("The Dump task is getting executed");
let (sender, receiver) = oneshot::channel();
if ret.send(self.dump(path).await.map(|_| sender)).is_err() {
if ret.send(self.dump(path).await).is_err() {
log::error!("The dump actor died.");
}
// wait until the dump has finished performing.
let _ = receiver.await;
}
Job::Empty => log::error!("Tried to process an empty task."),
Job::Snapshot(job) => {
@@ -484,7 +405,7 @@ where
#[cfg(test)]
mod test {
use std::{collections::BTreeMap, vec::IntoIter};
use std::collections::BTreeMap;
use super::*;
@@ -525,9 +446,9 @@ mod test {
};
if primary_key.is_some() {
mocker.when::<String, IndexResult<IndexMeta>>("update_primary_key")
.then(move |_| Ok(IndexMeta{ created_at: OffsetDateTime::now_utc(), updated_at: OffsetDateTime::now_utc(), primary_key: None }));
.then(move |_| Ok(IndexMeta{ created_at: Utc::now(), updated_at: Utc::now(), primary_key: None }));
}
mocker.when::<(IndexDocumentsMethod, Option<String>, UpdateFileStore, IntoIter<Uuid>), IndexResult<DocumentAdditionResult>>("update_documents")
mocker.when::<(IndexDocumentsMethod, Uuid, Option<String>, UpdateFileStore), IndexResult<DocumentAdditionResult>>("update_documents")
.then(move |(_, _, _, _)| result());
}
TaskContent::SettingsUpdate{..} => {
@@ -542,13 +463,13 @@ mod test {
}
TaskContent::DocumentDeletion(DocumentDeletion::Ids(_ids)) => {
let result = move || if !index_op_fails {
Ok(DocumentDeletionResult { deleted_documents: any_int as u64, remaining_documents: any_int as u64 })
Ok(any_int as u64)
} else {
// return this error because it's easy to generate...
Err(IndexError::DocumentNotFound("a doc".into()))
};
mocker.when::<&[String], IndexResult<DocumentDeletionResult>>("delete_documents")
mocker.when::<&[String], IndexResult<u64>>("delete_documents")
.then(move |_| result());
},
TaskContent::DocumentDeletion(DocumentDeletion::Clear) => {
@@ -570,7 +491,7 @@ mod test {
| TaskContent::IndexCreation { primary_key } => {
if primary_key.is_some() {
let result = move || if !index_op_fails {
Ok(IndexMeta{ created_at: OffsetDateTime::now_utc(), updated_at: OffsetDateTime::now_utc(), primary_key: None })
Ok(IndexMeta{ created_at: Utc::now(), updated_at: Utc::now(), primary_key: None })
} else {
// return this error because it's easy to generate...
Err(IndexError::DocumentNotFound("a doc".into()))
@@ -641,8 +562,7 @@ mod test {
let update_file_store = UpdateFileStore::mock(mocker);
let index_resolver = IndexResolver::new(uuid_store, index_store, update_file_store);
let batch = Batch { id: 1, created_at: OffsetDateTime::now_utc(), tasks: vec![task.clone()] };
let result = index_resolver.process_batch(batch).await;
let result = index_resolver.process_task(&task).await;
// Test for some expected output scenarios:
// Index creation and deletion cannot fail because of a failed index op, since they
@@ -656,9 +576,9 @@ mod test {
| TaskContent::DocumentAddition { allow_index_creation: false, ..}
| TaskContent::IndexUpdate { .. } ))
{
assert!(matches!(result.tasks[0].events.last().unwrap(), TaskEvent::Failed { .. }), "{:?}", result);
assert!(result.is_err(), "{:?}", result);
} else {
assert!(matches!(result.tasks[0].events.last().unwrap(), TaskEvent::Succeded { .. }), "{:?}", result);
assert!(result.is_ok(), "{:?}", result);
}
});
}

View File

@@ -10,8 +10,6 @@ mod snapshot;
pub mod tasks;
mod update_file_store;
use std::path::Path;
pub use index_controller::MeiliSearch;
pub use milli;
@@ -35,19 +33,3 @@ impl EnvSizer for heed::Env {
.fold(0, |acc, m| acc + m.len())
}
}
/// Check if a db is empty. It does not provide any information on the
/// validity of the data in it.
/// We consider a database as non empty when it's a non empty directory.
pub fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
let db_path = db_path.as_ref();
if !db_path.exists() {
true
// if we encounter an error or if the db is a file we consider the db non empty
} else if let Ok(dir) = db_path.read_dir() {
dir.count() == 0
} else {
true
}
}

View File

@@ -1,21 +1,20 @@
use core::fmt;
use std::{convert::TryFrom, ops::Deref, str::FromStr};
use std::{ops::Deref, str::FromStr};
use byte_unit::{Byte, ByteError};
use clap::Parser;
use milli::{update::IndexerConfig, CompressionType};
use serde::Serialize;
use milli::CompressionType;
use structopt::StructOpt;
use sysinfo::{RefreshKind, System, SystemExt};
#[derive(Debug, Clone, Parser)]
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[clap(long, default_value = "100000")] // 100k
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[clap(long)]
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory the indexer will use. It defaults to 2/3
@@ -25,71 +24,25 @@ pub struct IndexerOpts {
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[clap(long, default_value_t)]
#[structopt(long, default_value)]
pub max_memory: MaxMemory,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[clap(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[clap(long, requires = "chunk-compression-type")]
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[clap(long)]
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
#[derive(Debug, Clone, Parser, Default, Serialize)]
pub struct SchedulerConfig {
/// enable the autobatching experimental feature
#[clap(long, hide = true)]
pub enable_auto_batching: bool,
// The maximum number of updates of the same type that can be batched together.
// If unspecified, this is unlimited. A value of 0 is interpreted as 1.
#[clap(long, requires = "enable-autobatching", hide = true)]
pub max_batch_size: Option<usize>,
// The maximum number of documents in a document batch. Since batches must contain at least one
// update for the scheduler to make progress, the number of documents in a batch will be at
// least the number of documents of its first update.
#[clap(long, requires = "enable-autobatching", hide = true)]
pub max_documents_per_batch: Option<usize>,
/// Debounce duration in seconds
///
/// When a new task is enqueued, the scheduler waits for `debounce_duration_sec` seconds for new updates before
/// starting to process a batch of updates.
#[clap(long, requires = "enable-autobatching", hide = true)]
pub debounce_duration_sec: Option<u64>,
}
impl TryFrom<&IndexerOpts> for IndexerConfig {
type Error = anyhow::Error;
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(other.indexing_jobs.unwrap_or(num_cpus::get() / 2))
.build()?;
Ok(Self {
log_every_n: Some(other.log_every_n),
max_nb_chunks: other.max_nb_chunks,
max_memory: (*other.max_memory).map(|b| b.get_bytes() as usize),
chunk_compression_type: other.chunk_compression_type,
chunk_compression_level: other.chunk_compression_level,
thread_pool: Some(thread_pool),
max_positions_per_attributes: None,
..Default::default()
})
}
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {

View File

@@ -1,21 +1,17 @@
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use anyhow::bail;
use fs_extra::dir::{self, CopyOptions};
use log::{info, trace};
use meilisearch_auth::open_auth_store_env;
use tokio::sync::RwLock;
use tokio::time::sleep;
use walkdir::WalkDir;
use crate::compression::from_tar_gz;
use crate::index_controller::open_meta_env;
use crate::index_controller::versioning::VERSION_FILE_NAME;
use crate::tasks::task::Job;
use crate::tasks::Scheduler;
use crate::tasks::TaskStore;
pub struct SnapshotService {
pub(crate) db_path: PathBuf,
@@ -23,7 +19,7 @@ pub struct SnapshotService {
pub(crate) snapshot_path: PathBuf,
pub(crate) index_size: usize,
pub(crate) meta_env_size: usize,
pub(crate) scheduler: Arc<RwLock<Scheduler>>,
pub(crate) task_store: TaskStore,
}
impl SnapshotService {
@@ -40,7 +36,8 @@ impl SnapshotService {
index_size: self.index_size,
};
let job = Job::Snapshot(snapshot_job);
self.scheduler.write().await.schedule_job(job).await;
self.task_store.register_job(job).await;
sleep(self.snapshot_period).await;
}
}
@@ -52,10 +49,7 @@ pub fn load_snapshot(
ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool,
) -> anyhow::Result<()> {
let empty_db = crate::is_empty_db(&db_path);
let snapshot_path_exists = snapshot_path.as_ref().exists();
if empty_db && snapshot_path_exists {
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
match from_tar_gz(snapshot_path, &db_path) {
Ok(()) => Ok(()),
Err(e) => {
@@ -64,7 +58,7 @@ pub fn load_snapshot(
Err(e)
}
}
} else if !empty_db && !ignore_snapshot_if_db_exists {
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
db_path
@@ -72,8 +66,14 @@ pub fn load_snapshot(
.canonicalize()
.unwrap_or_else(|_| db_path.as_ref().to_owned())
)
} else if !snapshot_path_exists && !ignore_missing_snapshot {
bail!("snapshot doesn't exist at {:?}", snapshot_path.as_ref())
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
bail!(
"snapshot doesn't exist at {:?}",
snapshot_path
.as_ref()
.canonicalize()
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
)
} else {
Ok(())
}
@@ -146,7 +146,9 @@ impl SnapshotJob {
}
fn snapshot_meta_env(&self, path: &Path) -> anyhow::Result<()> {
let env = open_meta_env(&self.src_path, self.meta_env_size)?;
let mut options = heed::EnvOpenOptions::new();
options.map_size(self.meta_env_size);
let env = options.open(&self.src_path)?;
let dst = path.join("data.mdb");
env.copy_to_path(dst, heed::CompactionOption::Enabled)?;
@@ -182,10 +184,9 @@ impl SnapshotJob {
let mut options = heed::EnvOpenOptions::new();
options.map_size(self.index_size);
let index = milli::Index::new(options, entry.path())?;
index
.env
.copy_to_path(dst, heed::CompactionOption::Enabled)?;
let env = options.open(entry.path())?;
env.copy_to_path(dst, heed::CompactionOption::Enabled)?;
}
Ok(())
@@ -197,7 +198,9 @@ impl SnapshotJob {
std::fs::create_dir_all(&dst)?;
let dst = dst.join("data.mdb");
let env = open_auth_store_env(&auth_path)?;
let mut options = heed::EnvOpenOptions::new();
options.map_size(1_073_741_824);
let env = options.open(auth_path)?;
env.copy_to_path(dst, heed::CompactionOption::Enabled)?;
Ok(())

View File

@@ -1,14 +1,14 @@
use time::OffsetDateTime;
use chrono::{DateTime, Utc};
use super::task::Task;
use super::{task::Task, task_store::Pending};
pub type BatchId = u64;
pub type BatchId = u32;
#[derive(Debug)]
pub struct Batch {
pub id: BatchId,
pub created_at: OffsetDateTime,
pub tasks: Vec<Task>,
pub created_at: DateTime<Utc>,
pub tasks: Vec<Pending<Task>>,
}
impl Batch {

View File

@@ -1,38 +1,47 @@
use async_trait::async_trait;
use std::sync::Arc;
use std::time::Duration;
pub use scheduler::Scheduler;
pub use task_store::TaskFilter;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
#[cfg(test)]
pub use task_store::test::MockTaskStore as TaskStore;
#[cfg(not(test))]
pub use task_store::TaskStore;
pub use task_store::{Pending, TaskFilter};
use batch::Batch;
use error::Result;
use self::task::Job;
use scheduler::Scheduler;
pub mod batch;
pub mod error;
mod scheduler;
pub mod scheduler;
pub mod task;
mod task_store;
pub mod update_loop;
#[cfg_attr(test, mockall::automock(type Error=test::DebugError;))]
#[async_trait]
pub trait TaskPerformer: Sync + Send + 'static {
type Error: Serialize + for<'de> Deserialize<'de> + std::error::Error + Sync + Send + 'static;
/// Processes the `Task` batch returning the batch with the `Task` updated.
async fn process_batch(&self, batch: Batch) -> Batch;
async fn process_job(&self, job: Job);
async fn process(&self, batch: Batch) -> Batch;
/// `finish` is called when the result of `process` has been commited to the task store. This
/// method can be used to perform cleanup after the update has been completed for example.
async fn finish(&self, batch: &Batch);
}
pub fn create_task_store<P>(env: heed::Env, performer: Arc<P>) -> Result<TaskStore>
where
P: TaskPerformer,
{
let task_store = TaskStore::new(env)?;
let scheduler = Scheduler::new(task_store.clone(), performer, Duration::from_millis(1));
tokio::task::spawn_local(scheduler.run());
Ok(task_store)
}
#[cfg(test)]
mod test {
use serde::{Deserialize, Serialize};

View File

@@ -1,526 +1,253 @@
use std::cmp::Ordering;
use std::collections::{hash_map::Entry, BinaryHeap, HashMap, VecDeque};
use std::ops::{Deref, DerefMut};
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use atomic_refcell::AtomicRefCell;
use milli::update::IndexDocumentsMethod;
use time::OffsetDateTime;
use tokio::sync::{watch, RwLock};
use crate::options::SchedulerConfig;
use crate::update_file_store::UpdateFileStore;
use chrono::Utc;
use serde::{Deserialize, Serialize};
use super::batch::Batch;
use super::error::Result;
use super::task::{Job, Task, TaskContent, TaskEvent, TaskId};
use super::update_loop::UpdateLoop;
use super::{TaskFilter, TaskPerformer, TaskStore};
#[derive(Eq, Debug, Clone, Copy)]
enum TaskType {
DocumentAddition { number: usize },
DocumentUpdate { number: usize },
Other,
}
/// Two tasks are equal if they have the same type.
impl PartialEq for TaskType {
fn eq(&self, other: &Self) -> bool {
matches!(
(self, other),
(Self::DocumentAddition { .. }, Self::DocumentAddition { .. })
| (Self::DocumentUpdate { .. }, Self::DocumentUpdate { .. })
)
}
}
#[derive(Eq, Debug, Clone, Copy)]
struct PendingTask {
kind: TaskType,
id: TaskId,
}
impl PartialEq for PendingTask {
fn eq(&self, other: &Self) -> bool {
self.id.eq(&other.id)
}
}
impl PartialOrd for PendingTask {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for PendingTask {
fn cmp(&self, other: &Self) -> Ordering {
self.id.cmp(&other.id).reverse()
}
}
#[derive(Debug)]
struct TaskList {
index: String,
tasks: BinaryHeap<PendingTask>,
}
impl Deref for TaskList {
type Target = BinaryHeap<PendingTask>;
fn deref(&self) -> &Self::Target {
&self.tasks
}
}
impl DerefMut for TaskList {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.tasks
}
}
impl TaskList {
fn new(index: String) -> Self {
Self {
index,
tasks: Default::default(),
}
}
}
impl PartialEq for TaskList {
fn eq(&self, other: &Self) -> bool {
self.index == other.index
}
}
impl Eq for TaskList {}
impl Ord for TaskList {
fn cmp(&self, other: &Self) -> Ordering {
match (self.peek(), other.peek()) {
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(Some(lhs), Some(rhs)) => lhs.cmp(rhs),
}
}
}
impl PartialOrd for TaskList {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
#[derive(Default)]
struct TaskQueue {
/// Maps index uids to their TaskList, for quick access
index_tasks: HashMap<String, Arc<AtomicRefCell<TaskList>>>,
/// A queue that orders TaskList by the priority of their fist update
queue: BinaryHeap<Arc<AtomicRefCell<TaskList>>>,
}
impl TaskQueue {
fn insert(&mut self, task: Task) {
let uid = task.index_uid.into_inner();
let id = task.id;
let kind = match task.content {
TaskContent::DocumentAddition {
documents_count,
merge_strategy: IndexDocumentsMethod::ReplaceDocuments,
..
} => TaskType::DocumentAddition {
number: documents_count,
},
TaskContent::DocumentAddition {
documents_count,
merge_strategy: IndexDocumentsMethod::UpdateDocuments,
..
} => TaskType::DocumentUpdate {
number: documents_count,
},
_ => TaskType::Other,
};
let task = PendingTask { kind, id };
match self.index_tasks.entry(uid) {
Entry::Occupied(entry) => {
// A task list already exists for this index, all we have to to is to push the new
// update to the end of the list. This won't change the order since ids are
// monotically increasing.
let mut list = entry.get().borrow_mut();
// We only need the first element to be lower than the one we want to
// insert to preserve the order in the queue.
assert!(list.peek().map(|old_id| id >= old_id.id).unwrap_or(true));
list.push(task);
}
Entry::Vacant(entry) => {
let mut task_list = TaskList::new(entry.key().to_owned());
task_list.push(task);
let task_list = Arc::new(AtomicRefCell::new(task_list));
entry.insert(task_list.clone());
self.queue.push(task_list);
}
}
}
/// Passes a context with a view to the task list of the next index to schedule. It is
/// guaranteed that the first id from task list will be the lowest pending task id.
fn head_mut<R>(&mut self, mut f: impl FnMut(&mut TaskList) -> R) -> Option<R> {
let head = self.queue.pop()?;
let result = {
let mut ref_head = head.borrow_mut();
f(&mut *ref_head)
};
if !head.borrow().tasks.is_empty() {
// After being mutated, the head is reinserted to the correct position.
self.queue.push(head);
} else {
self.index_tasks.remove(&head.borrow().index);
}
Some(result)
}
pub fn is_empty(&self) -> bool {
self.queue.is_empty() && self.index_tasks.is_empty()
}
}
pub struct Scheduler {
jobs: VecDeque<Job>,
tasks: TaskQueue,
#[cfg(test)]
use super::task_store::test::MockTaskStore as TaskStore;
use super::task_store::Pending;
#[cfg(not(test))]
use super::task_store::TaskStore;
use super::TaskPerformer;
use crate::tasks::task::TaskEvent;
/// The scheduler roles is to perform batches of tasks one at a time. It will monitor the TaskStore
/// for new tasks, put them in a batch, and process the batch as soon as possible.
///
/// When a batch is currently processing, the scheduler is just waiting.
pub struct Scheduler<P: TaskPerformer> {
store: TaskStore,
processing: Vec<TaskId>,
next_fetched_task_id: TaskId,
config: SchedulerConfig,
/// Notifies the update loop that a new task was received
notifier: watch::Sender<()>,
performer: Arc<P>,
/// The interval at which the the `TaskStore` should be checked for new updates
task_store_check_interval: Duration,
}
impl Scheduler {
pub fn new<P>(
store: TaskStore,
performer: Arc<P>,
mut config: SchedulerConfig,
) -> Result<Arc<RwLock<Self>>>
where
P: TaskPerformer,
{
let (notifier, rcv) = watch::channel(());
let debounce_time = config.debounce_duration_sec;
// Disable autobatching
if !config.enable_auto_batching {
config.max_batch_size = Some(1);
}
let this = Self {
jobs: VecDeque::new(),
tasks: TaskQueue::default(),
impl<P> Scheduler<P>
where
P: TaskPerformer + Send + Sync + 'static,
P::Error: Serialize + for<'de> Deserialize<'de> + Send + Sync + 'static,
{
pub fn new(store: TaskStore, performer: Arc<P>, task_store_check_interval: Duration) -> Self {
Self {
store,
processing: Vec::new(),
next_fetched_task_id: 0,
config,
notifier,
};
// Notify update loop to start processing pending updates immediately after startup.
this.notify();
let this = Arc::new(RwLock::new(this));
let update_loop = UpdateLoop::new(
this.clone(),
performer,
debounce_time.filter(|&v| v > 0).map(Duration::from_secs),
rcv,
);
tokio::task::spawn_local(update_loop.run());
Ok(this)
}
pub async fn dump(&self, path: &Path, file_store: UpdateFileStore) -> Result<()> {
self.store.dump(path, file_store).await
}
fn register_task(&mut self, task: Task) {
assert!(!task.is_finished());
self.tasks.insert(task);
}
/// Clears the processing list, this method should be called when the processing of a batch is finished.
pub fn finish(&mut self) {
self.processing.clear();
}
pub fn notify(&self) {
let _ = self.notifier.send(());
}
fn notify_if_not_empty(&self) {
if !self.jobs.is_empty() || !self.tasks.is_empty() {
self.notify();
task_store_check_interval,
}
}
pub async fn update_tasks(&self, tasks: Vec<Task>) -> Result<Vec<Task>> {
self.store.update_tasks(tasks).await
}
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
self.store.get_task(id, filter).await
}
pub async fn list_tasks(
&self,
offset: Option<TaskId>,
filter: Option<TaskFilter>,
limit: Option<usize>,
) -> Result<Vec<Task>> {
self.store.list_tasks(offset, filter, limit).await
}
pub async fn get_processing_tasks(&self) -> Result<Vec<Task>> {
let mut tasks = Vec::new();
for id in self.processing.iter() {
let task = self.store.get_task(*id, None).await?;
tasks.push(task);
pub async fn run(self) {
loop {
if let Err(e) = self.process_next_batch().await {
log::error!("an error occured while processing an update batch: {}", e);
}
}
Ok(tasks)
}
pub async fn schedule_job(&mut self, job: Job) {
self.jobs.push_back(job);
self.notify();
}
async fn process_next_batch(&self) -> Result<()> {
match self.prepare_batch().await? {
Some(mut batch) => {
for task in &mut batch.tasks {
match task {
Pending::Task(task) => task.events.push(TaskEvent::Processing(Utc::now())),
Pending::Job(_) => (),
}
}
async fn fetch_pending_tasks(&mut self) -> Result<()> {
// We must NEVER re-enqueue an already processed task! It's content uuid would point to an unexisting file.
//
// TODO(marin): This may create some latency when the first batch lazy loads the pending updates.
let mut filter = TaskFilter::default();
filter.filter_fn(|task| !task.is_finished());
// the jobs are ignored
batch.tasks = self.store.update_tasks(batch.tasks).await?;
self.store
.list_tasks(Some(self.next_fetched_task_id), Some(filter), None)
.await?
.into_iter()
// The tasks arrive in reverse order, and we need to insert them in order.
.rev()
.for_each(|t| {
self.next_fetched_task_id = t.id + 1;
self.register_task(t);
});
let performer = self.performer.clone();
let batch_result = performer.process(batch).await;
self.handle_batch_result(batch_result).await?;
}
None => {
// No update found to create a batch we wait a bit before we retry.
tokio::time::sleep(self.task_store_check_interval).await;
}
}
Ok(())
}
/// Prepare the next batch, and set `processing` to the ids in that batch.
pub async fn prepare(&mut self) -> Result<Pending> {
// If there is a job to process, do it first.
if let Some(job) = self.jobs.pop_front() {
// There is more work to do, notify the update loop
self.notify_if_not_empty();
return Ok(Pending::Job(job));
}
// Try to fill the queue with pending tasks.
self.fetch_pending_tasks().await?;
/// Checks for pending tasks and groups them in a batch. If there are no pending update,
/// return Ok(None)
///
/// Until batching is properly implemented, the batches contain only one task.
async fn prepare_batch(&self) -> Result<Option<Batch>> {
match self.store.peek_pending_task().await {
Some(Pending::Task(next_task_id)) => {
let mut task = self.store.get_task(next_task_id, None).await?;
make_batch(&mut self.tasks, &mut self.processing, &self.config);
task.events.push(TaskEvent::Batched {
timestamp: Utc::now(),
batch_id: 0,
});
log::debug!("prepared batch with {} tasks", self.processing.len());
if !self.processing.is_empty() {
let ids = std::mem::take(&mut self.processing);
let (ids, mut tasks) = self.store.get_pending_tasks(ids).await?;
// The batch id is the id of the first update it contains
let id = match tasks.first() {
Some(Task { id, .. }) => *id,
_ => panic!("invalid batch"),
};
tasks.iter_mut().for_each(|t| {
t.events.push(TaskEvent::Batched {
batch_id: id,
timestamp: OffsetDateTime::now_utc(),
})
});
self.processing = ids;
let batch = Batch {
id,
created_at: OffsetDateTime::now_utc(),
tasks,
};
// There is more work to do, notify the update loop
self.notify_if_not_empty();
Ok(Pending::Batch(batch))
} else {
Ok(Pending::Nothing)
let batch = Batch {
id: 0,
// index_uid: task.index_uid.clone(),
created_at: Utc::now(),
tasks: vec![Pending::Task(task)],
};
Ok(Some(batch))
}
Some(Pending::Job(job)) => Ok(Some(Batch {
id: 0,
created_at: Utc::now(),
tasks: vec![Pending::Job(job)],
})),
None => Ok(None),
}
}
}
#[derive(Debug)]
pub enum Pending {
Batch(Batch),
Job(Job),
Nothing,
}
fn make_batch(tasks: &mut TaskQueue, processing: &mut Vec<TaskId>, config: &SchedulerConfig) {
processing.clear();
let mut doc_count = 0;
tasks.head_mut(|list| match list.peek().copied() {
Some(PendingTask {
kind: TaskType::Other,
id,
}) => {
processing.push(id);
list.pop();
}
Some(PendingTask { kind, .. }) => loop {
match list.peek() {
Some(pending) if pending.kind == kind => {
// We always need to process at least one task for the scheduler to make progress.
if processing.len() >= config.max_batch_size.unwrap_or(usize::MAX).max(1) {
break;
}
let pending = list.pop().unwrap();
processing.push(pending.id);
// We add the number of documents to the count if we are scheduling document additions and
// stop adding if we already have enough.
//
// We check that bound only after adding the current task to the batch, so that a batch contains at least one task.
match pending.kind {
TaskType::DocumentUpdate { number }
| TaskType::DocumentAddition { number } => {
doc_count += number;
if doc_count >= config.max_documents_per_batch.unwrap_or(usize::MAX) {
break;
}
}
_ => (),
}
}
_ => break,
}
},
None => (),
});
/// Handles the result from a batch processing.
///
/// When a task is processed, the result of the processing is pushed to its event list. The
/// handle batch result make sure that the new state is save into its store.
/// The tasks are then removed from the processing queue.
async fn handle_batch_result(&self, mut batch: Batch) -> Result<()> {
let tasks = self.store.update_tasks(batch.tasks).await?;
batch.tasks = tasks;
self.store.delete_pending(&batch.tasks[0]).await;
self.performer.finish(&batch).await;
Ok(())
}
}
#[cfg(test)]
mod test {
use milli::update::IndexDocumentsMethod;
use uuid::Uuid;
use nelson::Mocker;
use crate::{index_resolver::IndexUid, tasks::task::TaskContent};
use crate::index_resolver::IndexUid;
use crate::tasks::task::Task;
use crate::tasks::task_store::TaskFilter;
use super::super::task::{TaskContent, TaskEvent, TaskId, TaskResult};
use super::super::MockTaskPerformer;
use super::*;
fn gen_task(id: TaskId, index_uid: &str, content: TaskContent) -> Task {
Task {
id,
index_uid: IndexUid::new_unchecked(index_uid),
content,
events: vec![],
#[tokio::test]
async fn test_prepare_batch_full() {
let mocker = Mocker::default();
mocker
.when::<(TaskId, Option<TaskFilter>), Result<Option<Task>>>("get_task")
.once()
.then(|(id, _filter)| {
let task = Task {
id,
index_uid: IndexUid::new("Test".to_string()).unwrap(),
content: TaskContent::IndexDeletion,
events: vec![TaskEvent::Created(Utc::now())],
};
Ok(Some(task))
});
mocker
.when::<(), Option<Pending<TaskId>>>("peek_pending_task")
.then(|()| Some(Pending::Task(1)));
let store = TaskStore::mock(mocker);
let performer = Arc::new(MockTaskPerformer::new());
let scheduler = Scheduler {
store,
performer,
task_store_check_interval: Duration::from_millis(1),
};
let batch = scheduler.prepare_batch().await.unwrap().unwrap();
assert_eq!(batch.tasks.len(), 1);
assert!(
matches!(batch.tasks[0], Pending::Task(Task { id: 1, .. })),
"{:?}",
batch.tasks[0]
);
}
#[tokio::test]
async fn test_prepare_batch_empty() {
let mocker = Mocker::default();
mocker
.when::<(), Option<Pending<TaskId>>>("peek_pending_task")
.then(|()| None);
let store = TaskStore::mock(mocker);
let performer = Arc::new(MockTaskPerformer::new());
let scheduler = Scheduler {
store,
performer,
task_store_check_interval: Duration::from_millis(1),
};
assert!(scheduler.prepare_batch().await.unwrap().is_none());
}
#[tokio::test]
async fn test_loop_run_normal() {
let mocker = Mocker::default();
let mut id = Some(1);
mocker
.when::<(), Option<Pending<TaskId>>>("peek_pending_task")
.then(move |()| id.take().map(Pending::Task));
mocker
.when::<(TaskId, Option<TaskFilter>), Result<Task>>("get_task")
.once()
.then(|(id, _)| {
let task = Task {
id,
index_uid: IndexUid::new("Test".to_string()).unwrap(),
content: TaskContent::IndexDeletion,
events: vec![TaskEvent::Created(Utc::now())],
};
Ok(task)
});
mocker
.when::<Vec<Pending<Task>>, Result<Vec<Pending<Task>>>>("update_tasks")
.times(2)
.then(|tasks| {
assert_eq!(tasks.len(), 1);
Ok(tasks)
});
mocker.when::<(), ()>("delete_pending").once().then(|_| ());
let store = TaskStore::mock(mocker);
let mut performer = MockTaskPerformer::new();
performer.expect_process().once().returning(|mut batch| {
batch.tasks.iter_mut().for_each(|t| match t {
Pending::Task(Task { ref mut events, .. }) => events.push(TaskEvent::Succeded {
result: TaskResult::Other,
timestamp: Utc::now(),
}),
_ => panic!("expected a task, found a job"),
});
batch
});
performer.expect_finish().once().returning(|_| ());
let performer = Arc::new(performer);
let scheduler = Scheduler {
store,
performer,
task_store_check_interval: Duration::from_millis(1),
};
let handle = tokio::spawn(scheduler.run());
if let Ok(r) = tokio::time::timeout(Duration::from_millis(100), handle).await {
r.unwrap();
}
}
#[test]
fn register_updates_multiples_indexes() {
let mut queue = TaskQueue::default();
queue.insert(gen_task(0, "test1", TaskContent::IndexDeletion));
queue.insert(gen_task(1, "test2", TaskContent::IndexDeletion));
queue.insert(gen_task(2, "test2", TaskContent::IndexDeletion));
queue.insert(gen_task(3, "test2", TaskContent::IndexDeletion));
queue.insert(gen_task(4, "test1", TaskContent::IndexDeletion));
queue.insert(gen_task(5, "test1", TaskContent::IndexDeletion));
queue.insert(gen_task(6, "test2", TaskContent::IndexDeletion));
let test1_tasks = queue
.head_mut(|tasks| tasks.drain().map(|t| t.id).collect::<Vec<_>>())
.unwrap();
assert_eq!(test1_tasks, &[0, 4, 5]);
let test2_tasks = queue
.head_mut(|tasks| tasks.drain().map(|t| t.id).collect::<Vec<_>>())
.unwrap();
assert_eq!(test2_tasks, &[1, 2, 3, 6]);
assert!(queue.index_tasks.is_empty());
assert!(queue.queue.is_empty());
}
#[test]
fn test_make_batch() {
let mut queue = TaskQueue::default();
let content = TaskContent::DocumentAddition {
content_uuid: Uuid::new_v4(),
merge_strategy: IndexDocumentsMethod::ReplaceDocuments,
primary_key: Some("test".to_string()),
documents_count: 0,
allow_index_creation: true,
};
queue.insert(gen_task(0, "test1", content.clone()));
queue.insert(gen_task(1, "test2", content.clone()));
queue.insert(gen_task(2, "test2", TaskContent::IndexDeletion));
queue.insert(gen_task(3, "test2", content.clone()));
queue.insert(gen_task(4, "test1", content.clone()));
queue.insert(gen_task(5, "test1", TaskContent::IndexDeletion));
queue.insert(gen_task(6, "test2", content.clone()));
queue.insert(gen_task(7, "test1", content));
let mut batch = Vec::new();
let config = SchedulerConfig::default();
make_batch(&mut queue, &mut batch, &config);
assert_eq!(batch, &[0, 4]);
batch.clear();
make_batch(&mut queue, &mut batch, &config);
assert_eq!(batch, &[1]);
batch.clear();
make_batch(&mut queue, &mut batch, &config);
assert_eq!(batch, &[2]);
batch.clear();
make_batch(&mut queue, &mut batch, &config);
assert_eq!(batch, &[3, 6]);
batch.clear();
make_batch(&mut queue, &mut batch, &config);
assert_eq!(batch, &[5]);
batch.clear();
make_batch(&mut queue, &mut batch, &config);
assert_eq!(batch, &[7]);
assert!(queue.is_empty());
}
}

View File

@@ -1,9 +1,9 @@
use std::path::PathBuf;
use chrono::{DateTime, Utc};
use meilisearch_error::ResponseError;
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use tokio::sync::oneshot;
use uuid::Uuid;
@@ -36,37 +36,26 @@ impl From<DocumentAdditionResult> for TaskResult {
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
pub enum TaskEvent {
Created(
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
#[serde(with = "time::serde::rfc3339")]
OffsetDateTime,
),
Created(#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] DateTime<Utc>),
Batched {
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
timestamp: DateTime<Utc>,
batch_id: BatchId,
},
Processing(
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
#[serde(with = "time::serde::rfc3339")]
OffsetDateTime,
),
Processing(#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] DateTime<Utc>),
Succeded {
result: TaskResult,
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
timestamp: DateTime<Utc>,
},
Failed {
error: ResponseError,
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
timestamp: DateTime<Utc>,
},
}
/// A task represents an operation that Meilisearch must do.
/// A task represents an operation that MeiliSearch must do.
/// It's stored on disk and executed from the lowest to highest Task id.
/// Everytime a new task is created it has a higher Task id than the previous one.
/// See also `Job`.
@@ -102,13 +91,13 @@ impl Task {
/// A job is like a volatile priority `Task`.
/// It should be processed as fast as possible and is not stored on disk.
/// This means, when Meilisearch is closed all your unprocessed jobs will disappear.
/// This means, when MeiliSearch is closed all your unprocessed jobs will disappear.
#[derive(Debug, derivative::Derivative)]
#[derivative(PartialEq)]
pub enum Job {
Dump {
#[derivative(PartialEq = "ignore")]
ret: oneshot::Sender<Result<oneshot::Sender<()>, IndexResolverError>>,
ret: oneshot::Sender<Result<(), IndexResolverError>>,
path: PathBuf,
},
Snapshot(#[derivative(PartialEq = "ignore")] SnapshotJob),
@@ -176,7 +165,7 @@ mod test {
]
}
pub(super) fn datetime_strategy() -> impl Strategy<Value = OffsetDateTime> {
Just(OffsetDateTime::now_utc())
pub(super) fn datetime_strategy() -> impl Strategy<Value = DateTime<Utc>> {
Just(Utc::now())
}
}

View File

@@ -1,16 +1,19 @@
mod store;
use std::collections::HashSet;
use std::cmp::Ordering;
use std::collections::{BinaryHeap, HashSet};
use std::io::{BufWriter, Write};
use std::path::Path;
use std::sync::Arc;
use chrono::Utc;
use heed::{Env, RwTxn};
use log::debug;
use time::OffsetDateTime;
use tokio::sync::RwLock;
use uuid::Uuid;
use super::error::TaskError;
use super::task::{Task, TaskContent, TaskId};
use super::task::{Job, Task, TaskContent, TaskId};
use super::Result;
use crate::index_resolver::IndexUid;
use crate::tasks::task::TaskEvent;
@@ -22,10 +25,9 @@ pub use store::test::MockStore as Store;
pub use store::Store;
/// Defines constraints to be applied when querying for Tasks from the store.
#[derive(Default)]
#[derive(Default, Debug)]
pub struct TaskFilter {
indexes: Option<HashSet<String>>,
filter_fn: Option<Box<dyn Fn(&Task) -> bool + Sync + Send + 'static>>,
}
impl TaskFilter {
@@ -42,28 +44,85 @@ impl TaskFilter {
.get_or_insert_with(Default::default)
.insert(index);
}
}
pub fn filter_fn(&mut self, f: impl Fn(&Task) -> bool + Sync + Send + 'static) {
self.filter_fn.replace(Box::new(f));
/// You can't clone a job because of its volatile nature.
/// If you need to take the `Job` with you though. You can call the method
/// `Pending::take`. It'll return the `Pending` as-is but `Empty` the original.
#[derive(Debug, PartialEq)]
pub enum Pending<T> {
/// A task stored on disk that must be processed.
Task(T),
/// Job always have a higher priority over normal tasks and are not stored on disk.
/// It can be refered as `Volatile job`.
Job(Job),
}
impl Pending<TaskId> {
/// Makes a copy of the task or take the content of the volatile job.
pub(crate) fn take(&mut self) -> Self {
match self {
Self::Task(id) => Self::Task(*id),
Self::Job(job) => Self::Job(job.take()),
}
}
}
impl Eq for Pending<TaskId> {}
impl PartialOrd for Pending<TaskId> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
match (self, other) {
// in case of two tasks we want to return the lowest taskId first.
(Pending::Task(lhs), Pending::Task(rhs)) => Some(lhs.cmp(rhs).reverse()),
// A job is always better than a task.
(Pending::Task(_), Pending::Job(_)) => Some(Ordering::Less),
(Pending::Job(_), Pending::Task(_)) => Some(Ordering::Greater),
// When there is two jobs we consider them equals.
(Pending::Job(_), Pending::Job(_)) => Some(Ordering::Equal),
}
}
}
impl Pending<Task> {
pub fn get_content_uuid(&self) -> Option<Uuid> {
match self {
Pending::Task(task) => task.get_content_uuid(),
_ => None,
}
}
}
impl Ord for Pending<TaskId> {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap()
}
}
pub struct TaskStore {
store: Arc<Store>,
pending_queue: Arc<RwLock<BinaryHeap<Pending<TaskId>>>>,
}
impl Clone for TaskStore {
fn clone(&self) -> Self {
Self {
store: self.store.clone(),
pending_queue: self.pending_queue.clone(),
}
}
}
impl TaskStore {
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
let store = Arc::new(Store::new(env)?);
Ok(Self { store })
pub fn new(env: heed::Env) -> Result<Self> {
let mut store = Store::new(env)?;
let unfinished_tasks = store.reset_and_return_unfinished_tasks()?;
let store = Arc::new(store);
Ok(Self {
store,
pending_queue: Arc::new(RwLock::new(unfinished_tasks)),
})
}
pub async fn register(&self, index_uid: IndexUid, content: TaskContent) -> Result<Task> {
@@ -72,7 +131,7 @@ impl TaskStore {
let task = tokio::task::spawn_blocking(move || -> Result<Task> {
let mut txn = store.wtxn()?;
let next_task_id = store.next_task_id(&mut txn)?;
let created_at = TaskEvent::Created(OffsetDateTime::now_utc());
let created_at = TaskEvent::Created(Utc::now());
let task = Task {
id: next_task_id,
index_uid,
@@ -87,6 +146,11 @@ impl TaskStore {
})
.await??;
self.pending_queue
.write()
.await
.push(Pending::Task(task.id));
Ok(task)
}
@@ -95,6 +159,35 @@ impl TaskStore {
Ok(())
}
/// Register an update that applies on multiple indexes.
/// Currently the update is considered as a priority.
pub async fn register_job(&self, content: Job) {
debug!("registering a job: {:?}", content);
self.pending_queue.write().await.push(Pending::Job(content));
}
/// Returns the next task to process.
pub async fn peek_pending_task(&self) -> Option<Pending<TaskId>> {
let mut pending_queue = self.pending_queue.write().await;
loop {
match pending_queue.peek()? {
Pending::Job(Job::Empty) => drop(pending_queue.pop()),
_ => return Some(pending_queue.peek_mut()?.take()),
}
}
}
/// Returns the next task to process if there is one.
pub async fn get_processing_task(&self) -> Result<Option<Task>> {
match self.peek_pending_task().await {
Some(Pending::Task(tid)) => {
let task = self.get_task(tid, None).await?;
Ok(matches!(task.events.last(), Some(TaskEvent::Processing(_))).then(|| task))
}
_ => Ok(None),
}
}
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
let store = self.store.clone();
let task = tokio::task::spawn_blocking(move || -> Result<_> {
@@ -114,33 +207,17 @@ impl TaskStore {
}
}
pub async fn get_pending_tasks(&self, ids: Vec<TaskId>) -> Result<(Vec<TaskId>, Vec<Task>)> {
let store = self.store.clone();
let tasks = tokio::task::spawn_blocking(move || -> Result<_> {
let mut tasks = Vec::new();
let txn = store.rtxn()?;
for id in ids.iter() {
let task = store
.get(&txn, *id)?
.ok_or(TaskError::UnexistingTask(*id))?;
tasks.push(task);
}
Ok((ids, tasks))
})
.await??;
Ok(tasks)
}
pub async fn update_tasks(&self, tasks: Vec<Task>) -> Result<Vec<Task>> {
pub async fn update_tasks(&self, tasks: Vec<Pending<Task>>) -> Result<Vec<Pending<Task>>> {
let store = self.store.clone();
let tasks = tokio::task::spawn_blocking(move || -> Result<_> {
let mut txn = store.wtxn()?;
for task in &tasks {
store.put(&mut txn, task)?;
match task {
Pending::Task(task) => store.put(&mut txn, task)?,
Pending::Job(_) => (),
}
}
txn.commit()?;
@@ -152,6 +229,21 @@ impl TaskStore {
Ok(tasks)
}
/// Delete one task from the queue and remove all `Empty` job.
pub async fn delete_pending(&self, to_delete: &Pending<Task>) {
if let Pending::Task(Task { id: pending_id, .. }) = to_delete {
let mut pending_queue = self.pending_queue.write().await;
*pending_queue = std::mem::take(&mut *pending_queue)
.into_iter()
.filter(|pending| match pending {
Pending::Job(Job::Empty) => false,
Pending::Task(id) => pending_id != id,
_ => true,
})
.collect::<BinaryHeap<Pending<TaskId>>>();
}
}
pub async fn list_tasks(
&self,
offset: Option<TaskId>,
@@ -201,7 +293,7 @@ impl TaskStore {
Ok(())
}
pub fn load_dump(src: impl AsRef<Path>, env: Arc<Env>) -> anyhow::Result<()> {
pub fn load_dump(src: impl AsRef<Path>, env: Env) -> anyhow::Result<()> {
// create a dummy update field store, since it is not needed right now.
let store = Self::new(env.clone())?;
@@ -248,7 +340,7 @@ pub mod test {
}
impl MockTaskStore {
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
pub fn new(env: heed::Env) -> Result<Self> {
Ok(Self::Real(TaskStore::new(env)?))
}
@@ -256,15 +348,23 @@ pub mod test {
Self::Mock(Arc::new(mocker))
}
pub async fn update_tasks(&self, tasks: Vec<Task>) -> Result<Vec<Task>> {
pub async fn update_tasks(&self, tasks: Vec<Pending<Task>>) -> Result<Vec<Pending<Task>>> {
match self {
Self::Real(s) => s.update_tasks(tasks).await,
Self::Mock(m) => unsafe {
m.get::<_, Result<Vec<Task>>>("update_tasks").call(tasks)
m.get::<_, Result<Vec<Pending<Task>>>>("update_tasks")
.call(tasks)
},
}
}
pub async fn delete_pending(&self, to_delete: &Pending<Task>) {
match self {
Self::Real(s) => s.delete_pending(to_delete).await,
Self::Mock(m) => unsafe { m.get("delete_pending").call(to_delete) },
}
}
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
match self {
Self::Real(s) => s.get_task(id, filter).await,
@@ -272,13 +372,23 @@ pub mod test {
}
}
pub async fn get_pending_tasks(
&self,
tasks: Vec<TaskId>,
) -> Result<(Vec<TaskId>, Vec<Task>)> {
pub async fn get_processing_task(&self) -> Result<Option<Task>> {
match self {
Self::Real(s) => s.get_pending_tasks(tasks).await,
Self::Mock(m) => unsafe { m.get("get_pending_task").call(tasks) },
Self::Real(s) => s.get_processing_task().await,
Self::Mock(m) => unsafe {
m.get::<_, Result<Option<Task>>>("get_pending_task")
.call(())
},
}
}
pub async fn peek_pending_task(&self) -> Option<Pending<TaskId>> {
match self {
Self::Real(s) => s.peek_pending_task().await,
Self::Mock(m) => unsafe {
m.get::<_, Option<Pending<TaskId>>>("peek_pending_task")
.call(())
},
}
}
@@ -290,18 +400,14 @@ pub mod test {
) -> Result<Vec<Task>> {
match self {
Self::Real(s) => s.list_tasks(from, filter, limit).await,
Self::Mock(m) => unsafe { m.get("list_tasks").call((from, filter, limit)) },
Self::Mock(_m) => todo!(),
}
}
pub async fn dump(
&self,
path: impl AsRef<Path>,
update_file_store: UpdateFileStore,
) -> Result<()> {
pub async fn dump(&self, path: &Path, update_file_store: UpdateFileStore) -> Result<()> {
match self {
Self::Real(s) => s.dump(path, update_file_store).await,
Self::Mock(m) => unsafe { m.get("dump").call((path, update_file_store)) },
Self::Mock(_m) => todo!(),
}
}
@@ -319,7 +425,14 @@ pub mod test {
}
}
pub fn load_dump(path: impl AsRef<Path>, env: Arc<Env>) -> anyhow::Result<()> {
pub async fn register_job(&self, content: Job) {
match self {
Self::Real(s) => s.register_job(content).await,
Self::Mock(_m) => todo!(),
}
}
pub fn load_dump(path: impl AsRef<Path>, env: Env) -> anyhow::Result<()> {
TaskStore::load_dump(path, env)
}
}

View File

@@ -10,7 +10,6 @@ use std::convert::TryInto;
use std::mem::size_of;
use std::ops::Range;
use std::result::Result as StdResult;
use std::sync::Arc;
use heed::types::{ByteSlice, OwnedType, SerdeJson, Unit};
use heed::{BytesDecode, BytesEncode, Database, Env, RoTxn, RwTxn};
@@ -19,7 +18,7 @@ use crate::tasks::task::{Task, TaskId};
use super::super::Result;
use super::TaskFilter;
use super::{Pending, TaskFilter};
enum IndexUidTaskIdCodec {}
@@ -54,26 +53,18 @@ impl<'a> BytesDecode<'a> for IndexUidTaskIdCodec {
}
pub struct Store {
env: Arc<Env>,
env: Env,
uids_task_ids: Database<IndexUidTaskIdCodec, Unit>,
tasks: Database<OwnedType<BEU64>, SerdeJson<Task>>,
}
impl Drop for Store {
fn drop(&mut self) {
if Arc::strong_count(&self.env) == 1 {
self.env.as_ref().clone().prepare_for_closing();
}
}
}
impl Store {
/// Create a new store from the specified `Path`.
/// Be really cautious when calling this function, the returned `Store` may
/// be in an invalid state, with dangling processing tasks.
/// You want to patch all un-finished tasks and put them in your pending
/// queue with the `reset_and_return_unfinished_update` method.
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
pub fn new(env: heed::Env) -> Result<Self> {
let uids_task_ids = env.create_database(Some(UID_TASK_IDS))?;
let tasks = env.create_database(Some(TASKS))?;
@@ -84,6 +75,41 @@ impl Store {
})
}
/// This function should be called *right after* creating the store.
/// It put back all unfinished update in the `Created` state. This
/// allow us to re-enqueue an update that didn't had the time to finish
/// when MeiliSearch closed.
pub fn reset_and_return_unfinished_tasks(&mut self) -> Result<BinaryHeap<Pending<TaskId>>> {
let mut unfinished_tasks: BinaryHeap<Pending<TaskId>> = BinaryHeap::new();
let mut wtxn = self.wtxn()?;
let mut iter = self.tasks.rev_iter_mut(&mut wtxn)?;
while let Some(entry) = iter.next() {
let entry = entry?;
let (id, mut task): (BEU64, Task) = entry;
// Since all tasks are ordered, we can stop iterating when we encounter our first non-finished task.
if task.is_finished() {
break;
}
// we only keep the first state. Its supposed to be a `Created` state.
task.events.drain(1..);
unfinished_tasks.push(Pending::Task(id.get()));
// Since we own the id and the task this is a safe operation.
unsafe {
iter.put_current(&id, &task)?;
}
}
drop(iter);
wtxn.commit()?;
Ok(unfinished_tasks)
}
pub fn wtxn(&self) -> Result<RwTxn> {
Ok(self.env.write_txn()?)
}
@@ -131,11 +157,7 @@ impl Store {
.map(|limit| (limit as u64).saturating_add(from))
.unwrap_or(u64::MAX);
let iter: Box<dyn Iterator<Item = StdResult<_, heed::Error>>> = match filter {
Some(
ref filter @ TaskFilter {
indexes: Some(_), ..
},
) => {
Some(filter) => {
let iter = self
.compute_candidates(txn, filter, range)?
.into_iter()
@@ -143,24 +165,15 @@ impl Store {
Box::new(iter)
}
_ => Box::new(
None => Box::new(
self.tasks
.rev_range(txn, &(BEU64::new(range.start)..BEU64::new(range.end)))?
.map(|r| r.map(|(_, t)| t)),
),
};
let apply_fitler = |task: &StdResult<_, heed::Error>| match task {
Ok(ref t) => filter
.as_ref()
.and_then(|filter| filter.filter_fn.as_ref())
.map(|f| f(t))
.unwrap_or(true),
Err(_) => true,
};
// Collect 'limit' task if it exists or all of them.
let tasks = iter
.filter(apply_fitler)
.take(limit.unwrap_or(usize::MAX))
.try_fold::<_, _, StdResult<_, heed::Error>>(Vec::new(), |mut v, task| {
v.push(task?);
@@ -173,11 +186,11 @@ impl Store {
fn compute_candidates(
&self,
txn: &heed::RoTxn,
filter: &TaskFilter,
filter: TaskFilter,
range: Range<TaskId>,
) -> Result<BinaryHeap<TaskId>> {
let mut candidates = BinaryHeap::new();
if let Some(ref indexes) = filter.indexes {
if let Some(indexes) = filter.indexes {
for index in indexes {
// We need to prefix search the null terminated string to make sure that we only
// get exact matches for the index, and not other uids that would share the same
@@ -244,10 +257,10 @@ pub mod test {
Fake(Mocker),
}
pub struct TmpEnv(TempDir, Arc<heed::Env>);
pub struct TmpEnv(TempDir, heed::Env);
impl TmpEnv {
pub fn env(&self) -> Arc<heed::Env> {
pub fn env(&self) -> heed::Env {
self.1.clone()
}
}
@@ -258,16 +271,23 @@ pub mod test {
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100000);
options.max_dbs(1000);
let env = Arc::new(options.open(tmp.path()).unwrap());
let env = options.open(tmp.path()).unwrap();
TmpEnv(tmp, env)
}
impl MockStore {
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
pub fn new(env: heed::Env) -> Result<Self> {
Ok(Self::Real(Store::new(env)?))
}
pub fn reset_and_return_unfinished_tasks(&mut self) -> Result<BinaryHeap<Pending<TaskId>>> {
match self {
MockStore::Real(index) => index.reset_and_return_unfinished_tasks(),
MockStore::Fake(_) => todo!(),
}
}
pub fn wtxn(&self) -> Result<RwTxn> {
match self {
MockStore::Real(index) => index.wtxn(),
@@ -325,7 +345,7 @@ pub mod test {
let tasks = (0..100)
.map(|_| Task {
id: rand::random(),
index_uid: IndexUid::new_unchecked("test"),
index_uid: IndexUid::new_unchecked("test".to_string()),
content: TaskContent::IndexDeletion,
events: vec![],
})
@@ -356,14 +376,14 @@ pub mod test {
let task_1 = Task {
id: 1,
index_uid: IndexUid::new_unchecked("test"),
index_uid: IndexUid::new_unchecked("test".to_string()),
content: TaskContent::IndexDeletion,
events: vec![],
};
let task_2 = Task {
id: 0,
index_uid: IndexUid::new_unchecked("test1"),
index_uid: IndexUid::new_unchecked("test1".to_string()),
content: TaskContent::IndexDeletion,
events: vec![],
};
@@ -384,13 +404,13 @@ pub mod test {
// same thing but invert the ids
let task_1 = Task {
id: 0,
index_uid: IndexUid::new_unchecked("test"),
index_uid: IndexUid::new_unchecked("test".to_string()),
content: TaskContent::IndexDeletion,
events: vec![],
};
let task_2 = Task {
id: 1,
index_uid: IndexUid::new_unchecked("test1"),
index_uid: IndexUid::new_unchecked("test1".to_string()),
content: TaskContent::IndexDeletion,
events: vec![],
};

View File

@@ -1,108 +0,0 @@
use std::sync::Arc;
use std::time::Duration;
use time::OffsetDateTime;
use tokio::sync::{watch, RwLock};
use tokio::time::interval_at;
use super::batch::Batch;
use super::error::Result;
use super::scheduler::Pending;
use super::{Scheduler, TaskPerformer};
use crate::tasks::task::TaskEvent;
/// The update loop sequentially performs batches of updates by asking the scheduler for a batch,
/// and handing it to the `TaskPerformer`.
pub struct UpdateLoop<P: TaskPerformer> {
scheduler: Arc<RwLock<Scheduler>>,
performer: Arc<P>,
notifier: Option<watch::Receiver<()>>,
debounce_duration: Option<Duration>,
}
impl<P> UpdateLoop<P>
where
P: TaskPerformer + Send + Sync + 'static,
{
pub fn new(
scheduler: Arc<RwLock<Scheduler>>,
performer: Arc<P>,
debuf_duration: Option<Duration>,
notifier: watch::Receiver<()>,
) -> Self {
Self {
scheduler,
performer,
debounce_duration: debuf_duration,
notifier: Some(notifier),
}
}
pub async fn run(mut self) {
let mut notifier = self.notifier.take().unwrap();
loop {
if notifier.changed().await.is_err() {
break;
}
if let Some(t) = self.debounce_duration {
let mut interval = interval_at(tokio::time::Instant::now() + t, t);
interval.tick().await;
};
if let Err(e) = self.process_next_batch().await {
log::error!("an error occured while processing an update batch: {}", e);
}
}
}
async fn process_next_batch(&self) -> Result<()> {
let pending = { self.scheduler.write().await.prepare().await? };
match pending {
Pending::Batch(mut batch) => {
for task in &mut batch.tasks {
task.events
.push(TaskEvent::Processing(OffsetDateTime::now_utc()));
}
batch.tasks = {
self.scheduler
.read()
.await
.update_tasks(batch.tasks)
.await?
};
let performer = self.performer.clone();
let batch = performer.process_batch(batch).await;
self.handle_batch_result(batch).await?;
}
Pending::Job(job) => {
let performer = self.performer.clone();
performer.process_job(job).await;
}
Pending::Nothing => (),
}
Ok(())
}
/// Handles the result from a processed batch.
///
/// When a task is processed, the result of the process is pushed to its event list. The
/// `handle_batch_result` make sure that the new state is saved to the store.
/// The tasks are then removed from the processing queue.
async fn handle_batch_result(&self, mut batch: Batch) -> Result<()> {
let mut scheduler = self.scheduler.write().await;
let tasks = scheduler.update_tasks(batch.tasks).await?;
scheduler.finish();
drop(scheduler);
batch.tasks = tasks;
self.performer.finish(&batch).await;
Ok(())
}
}