Compare commits

..

15 Commits

Author SHA1 Message Date
Louis Dureuil
6d8c88b129 Analytics: change event name 2023-02-21 09:54:14 +01:00
Louis Dureuil
7510e3d684 rename search to multi_search in auth tests 2023-02-21 09:45:39 +01:00
Louis Dureuil
797da246a4 rename search to multi_search in tests 2023-02-21 09:45:17 +01:00
Louis Dureuil
e19bd82202 Rename search to multi_search in test server 2023-02-21 09:44:50 +01:00
Louis Dureuil
9edda9a1e8 Rename search to multi-search 2023-02-21 09:44:23 +01:00
Louis Dureuil
2bffc0b32a multi-search/authentication: Add authentication tests 2023-02-20 13:52:12 +01:00
Louis Dureuil
34ac3c0535 multi-search: Add multi search tests 2023-02-20 13:52:12 +01:00
Louis Dureuil
9ce65b3aaa multi-search: Add test server search method for multi search 2023-02-20 13:52:12 +01:00
Louis Dureuil
809847f138 multi-search: Add search with an array of indexes 2023-02-20 13:52:11 +01:00
Louis Dureuil
e83bc57b18 multi-search: Add basic analytics 2023-02-20 13:52:11 +01:00
Louis Dureuil
b83b67fe12 Authentication: Make allow_index_creation a private field 2023-02-20 13:52:11 +01:00
Louis Dureuil
421666a64d Authentication: Make search_rules optional in AuthFilter 2023-02-20 13:52:11 +01:00
Louis Dureuil
181a86305e Authentication: Directly pass the authfilter to the index scheduler 2023-02-20 13:52:11 +01:00
Louis Dureuil
d1e66f687e Authentication: is_index_authorized takes into account API key indexes even with a tenant token 2023-02-20 13:52:10 +01:00
Louis Dureuil
2b9cea271b Authentication: Refactor authentication check to work for tenant token even without an index in URL
Callers need to manually check `is_index_authorized` when using the route without an index in URL
2023-02-20 13:52:10 +01:00
102 changed files with 1188 additions and 6009 deletions

View File

@@ -1,3 +1,24 @@
# Compile
FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
WORKDIR /meilisearch
ARG COMMIT_SHA
ARG COMMIT_DATE
ARG GIT_TAG
ENV COMMIT_SHA=${COMMIT_SHA} COMMIT_DATE=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
RUN set -eux; \
apkArch="$(apk --print-arch)"; \
if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release
# Run
FROM uffizzi/ttyd:alpine
@@ -8,11 +29,19 @@ ENV MEILI_NO_ANALYTICS true
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
COPY target/x86_64-unknown-linux-musl/release/meilisearch /bin/meilisearch
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
# To stay compatible with the older version of the container (pre v0.27.0) we're
# going to symlink the meilisearch binary in the path to `/meilisearch`
RUN ln -s /bin/meilisearch /meilisearch
# This directory should hold all the data related to meilisearch so we're going
# to move our PWD in there.
# We don't want to put the meilisearch binary
WORKDIR /meili_data
EXPOSE 7700/tcp
ENTRYPOINT ["tini", "--"]

View File

@@ -0,0 +1,28 @@
name: Create issue to upgrade dependencies
on:
schedule:
# Run the first of the month, every 3 month
- cron: '0 0 1 */3 *'
workflow_dispatch:
jobs:
create-issue:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Create an issue
uses: actions-ecosystem/action-create-issue@v1
with:
github_token: ${{ secrets.MEILI_BOT_GH_PAT }}
title: Upgrade dependencies
body: |
This issue is about updating Meilisearch dependencies:
- [ ] Cargo toml dependencies of Meilisearch; but also the main engine-team repositories that Meilisearch depends on (charabia, heed...)
- [ ] If new Rust versions have been released, update the Rust version in the Clippy job of this [GitHub Action file](./.github/workflows/rust.yml)
⚠️ To avoid last minute bugs, this issue should only be done at the beginning of the sprint!
The GitHub action dependencies are managed by [Dependabot](./.github/dependabot.yml)
labels: |
dependencies
maintenance

View File

@@ -1,24 +0,0 @@
name: Create issue to upgrade dependencies
on:
schedule:
# Run the first of the month, every 3 month
- cron: '0 0 1 */3 *'
workflow_dispatch:
jobs:
create-issue:
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/dependency-issue.md > $ISSUE_TEMPLATE
- name: Create issue
run: |
gh issue create \
--title 'Upgrade dependencies' \
--label 'dependencies,maintenance' \
--body-file $ISSUE_TEMPLATE

View File

@@ -1,4 +1,4 @@
name: Benchmarks (manual)
name: Benchmarks
on:
workflow_dispatch:

View File

@@ -1,5 +1,3 @@
name: Publish binaries to GitHub release
on:
workflow_dispatch:
schedule:
@@ -7,6 +5,8 @@ on:
release:
types: [published]
name: Publish binaries to release
jobs:
check-version:
name: Check the version validity
@@ -54,7 +54,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.5.0
uses: svenstaro/upload-release-action@2.4.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
@@ -87,7 +87,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.5.0
uses: svenstaro/upload-release-action@2.4.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
@@ -123,7 +123,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.5.0
uses: svenstaro/upload-release-action@2.4.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
@@ -183,7 +183,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.5.0
uses: svenstaro/upload-release-action@2.4.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch

View File

@@ -1,4 +1,4 @@
name: Publish to APT & Homebrew
name: Publish to APT repository & Homebrew
on:
release:
@@ -35,7 +35,7 @@ jobs:
- name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.5.0
uses: svenstaro/upload-release-action@2.4.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb

View File

@@ -1,5 +1,4 @@
name: Publish images to Docker Hub
---
on:
push:
# Will run for every tag pushed except `latest`
@@ -13,6 +12,8 @@ on:
- cron: '0 23 * * *' # Every day at 11:00pm
workflow_dispatch:
name: Publish tagged images to Docker Hub
jobs:
docker:
runs-on: docker

View File

@@ -1,4 +1,4 @@
name: Benchmarks of indexing (push)
name: Benchmarks indexing (push)
on:
push:

View File

@@ -1,4 +1,4 @@
name: Benchmarks of search for geo (push)
name: Benchmarks search geo (push)
on:
push:

View File

@@ -1,4 +1,4 @@
name: Benchmarks of search for songs (push)
name: Benchmarks search songs (push)
on:
push:

View File

@@ -1,4 +1,4 @@
name: Benchmarks of search for Wikipedia articles (push)
name: Benchmarks search wikipedia articles (push)
on:
push:

View File

@@ -1,4 +1,4 @@
name: Test suite
name: Rust
on:
workflow_dispatch:
@@ -25,35 +25,36 @@ jobs:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Run test with Rust stable
if: github.event_name != 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run test with Rust nightly
if: github.event_name == 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Run test with Rust stable
if: github.event_name != 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run test with Rust nightly
if: github.event_name == 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
# Disable cache due to disk space issues with Windows workers in CI
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all
test-others:
name: Tests on ${{ matrix.os }}
@@ -63,47 +64,19 @@ jobs:
matrix:
os: [macos-12, windows-2022]
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all
test-all-features:
name: Tests all features on cron schedule only
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
if: github.event_name == 'schedule'
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo build with all features
uses: actions-rs/cargo@v1
with:
command: build
args: --workspace --locked --release --all-features
- name: Run cargo test with all features
uses: actions-rs/cargo@v1
with:
command: test
args: --workspace --locked --release --all-features
- uses: actions/checkout@v3
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
@@ -122,8 +95,8 @@ jobs:
with:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.0
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@@ -141,8 +114,8 @@ jobs:
toolchain: 1.67.0
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.0
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@@ -161,8 +134,8 @@ jobs:
toolchain: nightly
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.0
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

View File

@@ -14,26 +14,6 @@ jobs:
- name: checkout
uses: actions/checkout@v3
- run: sudo apt-get install musl-tools
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
target: x86_64-unknown-linux-musl
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.1
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --target x86_64-unknown-linux-musl --release
- name: Remove dockerignore so we can use the target folder in our docker build
run: rm -f .dockerignore
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
@@ -46,14 +26,14 @@ jobs:
- name: Docker metadata
id: meta
uses: docker/metadata-action@v4
uses: docker/metadata-action@v3
with:
images: registry.uffizzi.com/${{ env.UUID_TAG }}
tags: |
type=raw,value=60d
- name: Build Image
uses: docker/build-push-action@v4
uses: docker/build-push-action@v3
with:
context: ./
file: .github/uffizzi/Dockerfile

View File

@@ -82,7 +82,7 @@ jobs:
name: Use Remote Workflow to Preview on Uffizzi
needs:
- cache-compose-file
uses: UffizziCloud/preview-action/.github/workflows/reusable.yaml@v2
uses: UffizziCloud/preview-action/.github/workflows/reusable.yaml@desc
with:
# If this workflow was triggered by a PR close event, cache-key will be an empty string
# and this reusable workflow will delete the preview deployment.
@@ -95,8 +95,8 @@ jobs:
`meilisearch` command. You should be able to access this instance of meilisearch running in
the preview from the link Meilisearch Endpoint link given below.
Web Terminal Endpoint : <uffizzi-url>
Meilisearch Endpoint : <uffizzi-url>/meilisearch
Web Terminal Endpoint : ${{ needs.cache-compose-file.outputs.expected-url }}
Meilisearch Endpoint : ${{ needs.cache-compose-file.outputs.expected-url }}/meilisearch
permissions:
contents: read
pull-requests: write

View File

@@ -1,4 +1,4 @@
name: Update Meilisearch version in Cargo.toml
name: Update Meilisearch version in all Cargo.toml files
on:
workflow_dispatch:
@@ -14,7 +14,7 @@ env:
jobs:
update-version-cargo-toml:
name: Update version in Cargo.toml
name: Update version in Cargo.toml files
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
@@ -25,7 +25,7 @@ jobs:
override: true
- name: Install sd
run: cargo install sd
- name: Update Cargo.toml file
- name: Update Cargo.toml files
run: |
raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2)
new_string="version = \"$raw_new_version\""
@@ -35,13 +35,13 @@ jobs:
- name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch
uses: EndBug/add-and-commit@v9
with:
message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml"
message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml files"
new_branch: ${{ env.NEW_BRANCH }}
- name: Create the PR pointing to ${{ github.ref_name }}
run: |
gh pr create \
--title "Update version for the next release ($NEW_VERSION) in Cargo.toml" \
--body '⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.' \
--title "Update version for the next release ($NEW_VERSION) in Cargo.toml files" \
--body '⚠️ This PR is automatically generated. Check the new version is the expected one before merging.' \
--label 'skip changelog' \
--milestone $NEW_VERSION \
--base $GITHUB_REF_NAME

331
Cargo.lock generated
View File

@@ -252,7 +252,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8"
dependencies = [
"cfg-if",
"cipher 0.3.0",
"cipher",
"cpufeatures",
"opaque-debug",
]
@@ -410,7 +410,7 @@ checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf"
[[package]]
name = "benchmarks"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"anyhow",
"bytes",
@@ -517,23 +517,18 @@ dependencies = [
"serde",
]
[[package]]
name = "build_const"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7"
[[package]]
name = "bumpalo"
version = "3.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"
[[package]]
name = "bus"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80cb4625f5b60155ff1018c9d4ce2e38bf5ae3e5780dfab9fa68bb44a6b751e2"
dependencies = [
"crossbeam-channel",
"num_cpus",
"parking_lot_core",
]
[[package]]
name = "byte-unit"
version = "4.0.18"
@@ -652,17 +647,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chacha20"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7fc89c7c5b9e7a02dfe45cd2367bae382f9ed31c61ca8debe5f827c420a2f08"
dependencies = [
"cfg-if",
"cipher 0.4.4",
"cpufeatures",
]
[[package]]
name = "change-detection"
version = "1.2.0"
@@ -675,19 +659,16 @@ dependencies = [
[[package]]
name = "charabia"
version = "0.7.1"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad3d9667a6b4e03813162c22c4d58235c2dc25d580d60837ce29199038341c9"
checksum = "b57f9571f611796ea38e5a9c12e5ce37476f70397b032757f8dfe0c7b9bc5637"
dependencies = [
"cow-utils",
"csv",
"deunicode",
"fst",
"irg-kvariants",
"jieba-rs",
"lindera",
"lindera-ipadic",
"lindera-ko-dic",
"once_cell",
"pinyin",
"serde",
@@ -734,25 +715,20 @@ dependencies = [
"generic-array",
]
[[package]]
name = "cipher"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
dependencies = [
"crypto-common",
"inout",
]
[[package]]
name = "clap"
version = "3.2.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5"
dependencies = [
"atty",
"bitflags",
"clap_derive 3.2.18",
"clap_lex 0.2.4",
"indexmap",
"once_cell",
"strsim",
"termcolor",
"textwrap",
]
@@ -763,7 +739,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39"
dependencies = [
"bitflags",
"clap_derive",
"clap_derive 4.0.21",
"clap_lex 0.3.0",
"is-terminal",
"once_cell",
@@ -771,6 +747,19 @@ dependencies = [
"termcolor",
]
[[package]]
name = "clap_derive"
version = "3.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65"
dependencies = [
"heck",
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_derive"
version = "4.0.21"
@@ -802,24 +791,6 @@ dependencies = [
"os_str_bytes",
]
[[package]]
name = "cluster"
version = "1.1.0"
dependencies = [
"bus",
"crossbeam",
"ductile",
"log",
"meilisearch-types",
"roaring",
"serde",
"serde_json",
"synchronoise",
"thiserror",
"time",
"uuid 1.3.0",
]
[[package]]
name = "concat-arrays"
version = "0.1.2"
@@ -902,6 +873,15 @@ dependencies = [
"libc",
]
[[package]]
name = "crc"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb"
dependencies = [
"build_const",
]
[[package]]
name = "crc32fast"
version = "1.3.2"
@@ -1133,9 +1113,9 @@ dependencies = [
[[package]]
name = "deserr"
version = "0.5.0"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c71c14985c842bf1e520b1ebcd22daff6aeece32f510e11f063cecf9b308c04b"
checksum = "6eee2844f21cf7fb5693aae1fb8f1658127acfdb2fc072167d68a9152584ae64"
dependencies = [
"actix-http",
"actix-utils",
@@ -1150,9 +1130,9 @@ dependencies = [
[[package]]
name = "deserr-internal"
version = "0.5.0"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cae1c51b191528c9e4e5d6cff671de94f61fcda1c206cc891251e0cf438c941a"
checksum = "c27246f8ca9eeba9dd70d614b664dc43b529251ed7bd9e633131010d340da4b9"
dependencies = [
"convert_case 0.5.0",
"proc-macro2",
@@ -1198,24 +1178,9 @@ dependencies = [
"winapi",
]
[[package]]
name = "ductile"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12cde25956886749c891a27249630ae99471f1ba05c4a924aad1a6ffe6932812"
dependencies = [
"anyhow",
"bincode",
"chacha20",
"crossbeam-channel",
"log",
"rand",
"serde",
]
[[package]]
name = "dump"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"anyhow",
"big_s",
@@ -1235,14 +1200,14 @@ dependencies = [
"tempfile",
"thiserror",
"time",
"uuid 1.3.0",
"uuid 1.2.2",
]
[[package]]
name = "either"
version = "1.8.1"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797"
dependencies = [
"serde",
]
@@ -1368,19 +1333,6 @@ dependencies = [
"termcolor",
]
[[package]]
name = "env_logger"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0"
dependencies = [
"humantime",
"is-terminal",
"log",
"regex",
"termcolor",
]
[[package]]
name = "errno"
version = "0.2.8"
@@ -1436,12 +1388,12 @@ dependencies = [
[[package]]
name = "file-store"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"faux",
"tempfile",
"thiserror",
"uuid 1.3.0",
"uuid 1.2.2",
]
[[package]]
@@ -1458,7 +1410,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"insta",
"nom",
@@ -1478,7 +1430,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"criterion",
"serde_json",
@@ -1955,12 +1907,11 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"anyhow",
"big_s",
"bincode",
"cluster",
"crossbeam",
"csv",
"derive_builder",
@@ -1981,7 +1932,7 @@ dependencies = [
"tempfile",
"thiserror",
"time",
"uuid 1.3.0",
"uuid 1.2.2",
]
[[package]]
@@ -1995,15 +1946,6 @@ dependencies = [
"serde",
]
[[package]]
name = "inout"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
dependencies = [
"generic-array",
]
[[package]]
name = "insta"
version = "1.26.0"
@@ -2045,17 +1987,6 @@ version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146"
[[package]]
name = "irg-kvariants"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c73214298363629cf9dbfc93b426808865ee3c121029778cb31b1284104fdf78"
dependencies = [
"csv",
"once_cell",
"serde",
]
[[package]]
name = "is-terminal"
version = "0.4.2"
@@ -2124,7 +2055,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"criterion",
"serde_json",
@@ -2144,15 +2075,6 @@ dependencies = [
"simple_asn1",
]
[[package]]
name = "kanaria"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0f9d9652540055ac4fded998a73aca97d965899077ab1212587437da44196ff"
dependencies = [
"bitflags",
]
[[package]]
name = "language-tags"
version = "0.3.2"
@@ -2222,15 +2144,14 @@ dependencies = [
[[package]]
name = "lindera"
version = "0.21.1"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f33a20bb9cbf95572b2d2f40d7040c8d8c7ad09ae20e1f6513db6ef2564dfc5"
checksum = "082ca91ac4d1557028ace9bfb8cee1500d156a4574dda93cfcdcf4caaebb9bd7"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"encoding",
"kanaria",
"lindera-cc-cedict-builder",
"lindera-core",
"lindera-dictionary",
@@ -2239,27 +2160,24 @@ dependencies = [
"lindera-ko-dic",
"lindera-ko-dic-builder",
"lindera-unidic-builder",
"regex",
"serde",
"serde_json",
"thiserror",
"unicode-blocks",
"unicode-normalization",
"yada",
]
[[package]]
name = "lindera-cc-cedict-builder"
version = "0.21.0"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60c3b379251edadbac7a5fdb31e482274e11dae6ab6cc789d0d86cf34369cf49"
checksum = "a8967615a6d85320ec2755e1435c36165467ba01a79026adc3f86dad1b668df3"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"clap 3.2.23",
"csv",
"encoding",
"env_logger 0.10.0",
"env_logger",
"glob",
"lindera-core",
"lindera-decompress",
@@ -2267,29 +2185,17 @@ dependencies = [
"yada",
]
[[package]]
name = "lindera-compress"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8d0ea3de5625e2381cac94e518d3b56103fde56bc0dce840fe875c1e871b125"
dependencies = [
"anyhow",
"flate2",
"lindera-decompress",
]
[[package]]
name = "lindera-core"
version = "0.21.0"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2281747b98fdd46bcc54ce7fdb6870dad9f67ddb3dc086c47b6704f3e1178cd5"
checksum = "0e8ed3cea13f73557a4574a179b1518670a3b70bfdad120521313b03cc89380e"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"encoding_rs",
"log",
"once_cell",
"serde",
"thiserror",
"yada",
@@ -2297,20 +2203,20 @@ dependencies = [
[[package]]
name = "lindera-decompress"
version = "0.21.0"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52101bd454754c506305ab897af5ac2ae41fe91e3272c1ff5c6a02a089dfaefd"
checksum = "2badb41828f89cfa6452db0a66da77897c0a04478304de26c8b2b36613e08d43"
dependencies = [
"anyhow",
"flate2",
"lzma-rs",
"serde",
]
[[package]]
name = "lindera-dictionary"
version = "0.21.0"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af1c6668848f1d30d216c99093a3ed3fe125c105fa12a4aeed5a1861dc01dd52"
checksum = "e219722c9f56b920c231210e7c25d8b5d35b508e7a2fd69d368916c4b1c926f6"
dependencies = [
"anyhow",
"bincode",
@@ -2320,16 +2226,15 @@ dependencies = [
[[package]]
name = "lindera-ipadic"
version = "0.21.0"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "693098007200fa43fd5cdc9ca8740f371327369672ce812cd87a1f6344971e31"
checksum = "2c8e87c8362c724e8188fb7d9b6d184cac15d01369295e9bff7812b630d57e3b"
dependencies = [
"bincode",
"byteorder",
"encoding",
"flate2",
"lindera-core",
"lindera-decompress",
"lindera-ipadic-builder",
"once_cell",
"tar",
@@ -2337,19 +2242,19 @@ dependencies = [
[[package]]
name = "lindera-ipadic-builder"
version = "0.21.0"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b6b7240d097a8fc37ee8f90ebff02c4db0ba5325ecb0dacb6da3724596798c9"
checksum = "1439e95852e444a116424086dc64d709c90e8af269ff7d2c2c4020f666f8dfab"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"clap 3.2.23",
"csv",
"encoding_rs",
"encoding_rs_io",
"env_logger 0.10.0",
"env_logger",
"glob",
"lindera-compress",
"lindera-core",
"lindera-decompress",
"log",
@@ -2359,16 +2264,15 @@ dependencies = [
[[package]]
name = "lindera-ko-dic"
version = "0.21.0"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abd3c5a4addeb61ca66788a3dd1fd51093e6cd8fea1d997042ada5aa60e8cc5e"
checksum = "cb15f949220da45872d774b7831bb030855ec083435c907499782f8558c8a203"
dependencies = [
"bincode",
"byteorder",
"encoding",
"flate2",
"lindera-core",
"lindera-decompress",
"lindera-ko-dic-builder",
"once_cell",
"tar",
@@ -2376,18 +2280,18 @@ dependencies = [
[[package]]
name = "lindera-ko-dic-builder"
version = "0.21.0"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512bb1393a9281e0b13704319d1343b7931416865852d9d7b7c0178431518326"
checksum = "fde5a7352f4754be4f741e90bf4dff38a12a6572ab3880d0cf688e1166b8d82b"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"clap 3.2.23",
"csv",
"encoding",
"env_logger 0.10.0",
"env_logger",
"glob",
"lindera-compress",
"lindera-core",
"lindera-decompress",
"log",
@@ -2396,16 +2300,17 @@ dependencies = [
[[package]]
name = "lindera-unidic-builder"
version = "0.21.0"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f575a27f8ba67c15fe16ebf7d277a0ac04e8c8a0f72670ebc2443da9d41c450"
checksum = "f1451b2ed8a7184a5f815d84f99d358c1d67297305831453dfdc0eb5d08e22b5"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"clap 3.2.23",
"csv",
"encoding",
"env_logger 0.10.0",
"env_logger",
"glob",
"lindera-core",
"lindera-decompress",
@@ -2494,6 +2399,16 @@ dependencies = [
"syn",
]
[[package]]
name = "lzma-rs"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aba8ecb0450dfabce4ad72085eed0a75dffe8f21f7ada05638564ea9db2d7fb1"
dependencies = [
"byteorder",
"crc",
]
[[package]]
name = "manifest-dir-macros"
version = "0.1.16"
@@ -2520,7 +2435,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"insta",
"md5",
@@ -2529,7 +2444,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"actix-cors",
"actix-http",
@@ -2548,12 +2463,11 @@ dependencies = [
"bytes",
"cargo_toml",
"clap 4.0.32",
"cluster",
"crossbeam-channel",
"deserr",
"dump",
"either",
"env_logger 0.9.3",
"env_logger",
"file-store",
"flate2",
"fst",
@@ -2609,7 +2523,7 @@ dependencies = [
"tokio-stream",
"toml",
"urlencoding",
"uuid 1.3.0",
"uuid 1.2.2",
"vergen",
"walkdir",
"yaup",
@@ -2618,10 +2532,9 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"base64 0.13.1",
"cluster",
"enum-iterator",
"hmac",
"maplit",
@@ -2633,12 +2546,12 @@ dependencies = [
"sha2",
"thiserror",
"time",
"uuid 1.3.0",
"uuid 1.2.2",
]
[[package]]
name = "meilisearch-types"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"actix-web",
"anyhow",
@@ -2663,7 +2576,7 @@ dependencies = [
"thiserror",
"time",
"tokio",
"uuid 1.3.0",
"uuid 1.2.2",
]
[[package]]
@@ -2692,7 +2605,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"big_s",
"bimap",
@@ -2738,7 +2651,7 @@ dependencies = [
"tempfile",
"thiserror",
"time",
"uuid 1.3.0",
"uuid 1.2.2",
]
[[package]]
@@ -3046,7 +2959,7 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
[[package]]
name = "permissive-json-pointer"
version = "1.1.0"
version = "1.0.0"
dependencies = [
"big_s",
"serde_json",
@@ -3576,9 +3489,9 @@ checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a"
[[package]]
name = "serde"
version = "1.0.155"
version = "1.0.152"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71f2b4817415c6d4210bfe1c7bfcf4801b2d904cb4d0e1a8fdb651013c9e86b8"
checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
dependencies = [
"serde_derive",
]
@@ -3594,9 +3507,9 @@ dependencies = [
[[package]]
name = "serde_derive"
version = "1.0.155"
version = "1.0.152"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d071a94a3fac4aff69d023a7f411e33f40f3483f8c5190b1953822b6b76d7630"
checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e"
dependencies = [
"proc-macro2",
"quote",
@@ -3605,9 +3518,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.94"
version = "1.0.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea"
checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883"
dependencies = [
"indexmap",
"itoa 1.0.5",
@@ -3893,18 +3806,18 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
[[package]]
name = "thiserror"
version = "1.0.39"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c"
checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.39"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e"
checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
dependencies = [
"proc-macro2",
"quote",
@@ -3913,9 +3826,9 @@ dependencies = [
[[package]]
name = "time"
version = "0.3.20"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890"
checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376"
dependencies = [
"itoa 1.0.5",
"serde",
@@ -3931,9 +3844,9 @@ checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd"
[[package]]
name = "time-macros"
version = "0.2.8"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36"
checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2"
dependencies = [
"time-core",
]
@@ -4099,12 +4012,6 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"
[[package]]
name = "unicode-blocks"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9de2be6bad6f56ce8373d377e611cbb2265de3a656138065609ce82e217aad70"
[[package]]
name = "unicode-ident"
version = "1.0.6"
@@ -4178,9 +4085,9 @@ dependencies = [
[[package]]
name = "uuid"
version = "1.3.0"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79"
checksum = "422ee0de9031b5b948b97a8fc04e3aa35230001a722ddd27943e0be31564ce4c"
dependencies = [
"getrandom",
"serde",

View File

@@ -9,7 +9,6 @@ members = [
"dump",
"file-store",
"permissive-json-pointer",
"cluster",
"milli",
"filter-parser",
"flatten-serde-json",
@@ -18,7 +17,7 @@ members = [
]
[workspace.package]
version = "1.1.0"
version = "1.0.0"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"

View File

@@ -29,7 +29,7 @@ fn bench_formatting(c: &mut criterion::Criterion) {
(vec![Rc::new(MatchingWord::new("thedoord".to_string(), 1, true).unwrap())], vec![0, 1, 2]),
(vec![Rc::new(MatchingWord::new("doord".to_string(), 1, true).unwrap())], vec![1, 2]),
]
).unwrap(), TokenizerBuilder::default().build()),
), TokenizerBuilder::default().build()),
},
];

View File

@@ -1,25 +0,0 @@
[package]
name = "cluster"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
ductile = "0.3.0"
serde = { version = "1.0.155", features = ["derive"] }
serde_json = "1.0.94"
thiserror = "1.0.39"
meilisearch-types = { path = "../meilisearch-types" }
roaring = { version = "0.10.1", features = ["serde"] }
log = "0.4.17"
crossbeam = "0.8.2"
bus = "2.3.0"
time = "0.3.20"
uuid = { version = "1.3.0", features = ["v4"] }
synchronoise = "1.0.1"

View File

@@ -1,148 +0,0 @@
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::tasks::TaskId;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use uuid::Uuid;
/// Represents a combination of tasks that can all be processed at the same time.
///
/// A batch contains the set of tasks that it represents (accessible through
/// [`self.ids()`](Batch::ids)), as well as additional information on how to
/// be processed.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Batch {
TaskCancelation {
/// The task cancelation itself.
task: TaskId,
/// The date and time at which the previously processing tasks started.
previous_started_at: OffsetDateTime,
/// The list of tasks that were processing when this task cancelation appeared.
previous_processing_tasks: RoaringBitmap,
},
TaskDeletion(TaskId),
SnapshotCreation(Vec<TaskId>),
Dump(TaskId),
IndexOperation {
op: IndexOperation,
must_create_index: bool,
},
IndexCreation {
index_uid: String,
primary_key: Option<String>,
task: TaskId,
},
IndexUpdate {
index_uid: String,
primary_key: Option<String>,
task: TaskId,
},
IndexDeletion {
index_uid: String,
tasks: Vec<TaskId>,
index_has_been_created: bool,
},
IndexSwap {
task: TaskId,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum DocumentOperation {
Add(Uuid),
Delete(Vec<String>),
}
/// A [batch](Batch) that combines multiple tasks operating on an index.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum IndexOperation {
DocumentOperation {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
operations: Vec<DocumentOperation>,
tasks: Vec<TaskId>,
},
DocumentDeletion {
index_uid: String,
// The vec associated with each document deletion tasks.
documents: Vec<Vec<String>>,
tasks: Vec<TaskId>,
},
DocumentClear {
index_uid: String,
tasks: Vec<TaskId>,
},
Settings {
index_uid: String,
// The boolean indicates if it's a settings deletion or creation.
settings: Vec<(bool, Settings<Unchecked>)>,
tasks: Vec<TaskId>,
},
DocumentClearAndSetting {
index_uid: String,
cleared_tasks: Vec<TaskId>,
// The boolean indicates if it's a settings deletion or creation.
settings: Vec<(bool, Settings<Unchecked>)>,
settings_tasks: Vec<TaskId>,
},
SettingsAndDocumentOperation {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
operations: Vec<DocumentOperation>,
document_import_tasks: Vec<TaskId>,
// The boolean indicates if it's a settings deletion or creation.
settings: Vec<(bool, Settings<Unchecked>)>,
settings_tasks: Vec<TaskId>,
},
}
impl Batch {
pub fn ids(&self) -> impl Iterator<Item = TaskId> {
type Ret = Box<dyn Iterator<Item = TaskId>>;
match self {
Batch::TaskCancelation { task, .. } => Box::new(std::iter::once(*task)) as Ret,
Batch::TaskDeletion(task) => Box::new(std::iter::once(*task)) as Ret,
Batch::SnapshotCreation(tasks) => Box::new(tasks.clone().into_iter()) as Ret,
Batch::Dump(task) => Box::new(std::iter::once(*task)) as Ret,
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. } => {
Box::new(tasks.clone().into_iter()) as Ret
}
IndexOperation::DocumentDeletion { tasks, .. } => {
Box::new(tasks.clone().into_iter()) as Ret
}
IndexOperation::DocumentClear { tasks, .. } => {
Box::new(tasks.clone().into_iter()) as Ret
}
IndexOperation::Settings { tasks, .. } => {
Box::new(tasks.clone().into_iter()) as Ret
}
IndexOperation::DocumentClearAndSetting {
cleared_tasks, settings_tasks, ..
} => {
Box::new(cleared_tasks.clone().into_iter().chain(settings_tasks.clone())) as Ret
}
IndexOperation::SettingsAndDocumentOperation {
document_import_tasks,
settings_tasks,
..
} => Box::new(
document_import_tasks.clone().into_iter().chain(settings_tasks.clone()),
) as Ret,
},
Batch::IndexCreation { task, .. } => Box::new(std::iter::once(*task)) as Ret,
Batch::IndexUpdate { task, .. } => Box::new(std::iter::once(*task)) as Ret,
Batch::IndexDeletion { tasks, .. } => Box::new(tasks.clone().into_iter()) as Ret,
Batch::IndexSwap { task } => Box::new(std::iter::once(*task)) as Ret,
}
}
}

View File

@@ -1,276 +0,0 @@
use std::net::ToSocketAddrs;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{atomic, Arc, Mutex, RwLock};
use std::time::Duration;
use bus::{Bus, BusReader};
use crossbeam::channel::{unbounded, Receiver, Sender};
use ductile::{ChannelReceiver, ChannelSender, ChannelServer};
use log::{info, warn};
use meilisearch_types::keys::Key;
use meilisearch_types::tasks::Task;
use synchronoise::SignalEvent;
use uuid::Uuid;
use crate::batch::Batch;
use crate::{ApiKeyOperation, Consistency, FollowerMsg, LeaderMsg};
#[derive(Clone)]
pub struct Leader {
task_ready_to_commit: Receiver<u32>,
broadcast_to_follower: Sender<LeaderMsg>,
needs_key_sender: Sender<Sender<Vec<Key>>>,
needs_key_receiver: Receiver<Sender<Vec<Key>>>,
pub wake_up: Arc<SignalEvent>,
new_followers: Arc<AtomicUsize>,
active_followers: Arc<AtomicUsize>,
batch_id: Arc<RwLock<u32>>,
}
impl Leader {
pub fn new(
listen_on: impl ToSocketAddrs + Send + 'static,
master_key: Option<String>,
) -> Leader {
let new_followers = Arc::new(AtomicUsize::new(0));
let active_followers = Arc::new(AtomicUsize::new(1));
let wake_up = Arc::new(SignalEvent::auto(true));
let (broadcast_to_follower, process_batch_receiver) = unbounded();
let (task_finished_sender, task_finished_receiver) = unbounded();
let (needs_key_sender, needs_key_receiver) = unbounded();
let nf = new_followers.clone();
let af = active_followers.clone();
let wu = wake_up.clone();
std::thread::spawn(move || {
Self::listener(
listen_on,
master_key,
nf,
af,
wu,
process_batch_receiver,
task_finished_sender,
)
});
Leader {
task_ready_to_commit: task_finished_receiver,
broadcast_to_follower,
needs_key_sender,
needs_key_receiver,
wake_up,
new_followers,
active_followers,
batch_id: Arc::default(),
}
}
pub fn has_new_followers(&self) -> bool {
self.new_followers.load(Ordering::Relaxed) != 0
}
/// Takes all the necessary channels to chat with the scheduler and give them
/// to each new followers
fn listener(
listen_on: impl ToSocketAddrs,
master_key: Option<String>,
new_followers: Arc<AtomicUsize>,
active_followers: Arc<AtomicUsize>,
wake_up: Arc<SignalEvent>,
broadcast_to_follower: Receiver<LeaderMsg>,
task_finished: Sender<u32>,
) {
let listener: ChannelServer<LeaderMsg, FollowerMsg> = if let Some(ref master_key) =
master_key
{
let mut enc = [0; 32];
let master_key = master_key.as_bytes();
if master_key.len() < 32 {
warn!("Master key is not secure, use a longer master key (at least 32 bytes long)");
}
enc.iter_mut().zip(master_key).for_each(|(enc, mk)| *enc = *mk);
info!("Listening with encryption enabled");
ChannelServer::bind_with_enc(listen_on, enc).unwrap()
} else {
ChannelServer::bind(listen_on).unwrap()
};
info!("Ready to the receive connections");
// We're going to broadcast all the batches to all our follower
let bus: Bus<LeaderMsg> = Bus::new(10);
let bus = Arc::new(Mutex::new(bus));
let b = bus.clone();
std::thread::spawn(move || loop {
let msg = broadcast_to_follower.recv().expect("Main thread is dead");
b.lock().unwrap().broadcast(msg);
});
for (sender, receiver, _addr) in listener {
let task_finished = task_finished.clone();
let nf = new_followers.clone();
let af = active_followers.clone();
let wu = wake_up.clone();
let process_batch = bus.lock().unwrap().add_rx();
std::thread::spawn(move || {
Self::follower(sender, receiver, nf, af, wu, process_batch, task_finished)
});
}
}
/// Allow a follower to chat with the scheduler
fn follower(
sender: ChannelSender<LeaderMsg>,
receiver: ChannelReceiver<FollowerMsg>,
new_followers: Arc<AtomicUsize>,
active_followers: Arc<AtomicUsize>,
wake_up: Arc<SignalEvent>,
mut broadcast_to_follower: BusReader<LeaderMsg>,
task_finished: Sender<u32>,
) {
let size = new_followers.fetch_add(1, Ordering::Relaxed) + 1;
wake_up.signal();
info!("A new follower joined the cluster. {} members.", size);
loop {
if let msg @ LeaderMsg::JoinFromDump(_) =
broadcast_to_follower.recv().expect("Main thread died")
{
// we exit the new_follower state and become an active follower even though
// the dump will takes some time to index
new_followers.fetch_sub(1, Ordering::Relaxed);
let size = active_followers.fetch_add(1, Ordering::Relaxed) + 1;
info!("A new follower became active. {} active members.", size);
sender.send(msg).unwrap();
break;
}
}
// send messages to the follower
std::thread::spawn(move || loop {
let msg = broadcast_to_follower.recv().expect("Main thread died");
match msg {
LeaderMsg::JoinFromDump(_) => (),
msg => {
if sender.send(msg).is_err() {
// the follower died, the logging and cluster size update should be done
// in the other thread
break;
}
}
}
});
// receive messages from the follower
loop {
match receiver.recv() {
Err(_) => break,
Ok(msg) => match msg {
FollowerMsg::ReadyToCommit(id) => {
task_finished.send(id).expect("Can't reach the main thread")
}
FollowerMsg::RegisterNewTask(_) => todo!(),
},
}
}
// if we exited from the previous loop it means the follower is down and should
// be removed from the cluster
let size = active_followers.fetch_sub(1, atomic::Ordering::Relaxed) - 1;
info!("A follower left the cluster. {} members.", size);
}
// ============= Everything related to the setup of the cluster
pub fn join_me(&self, dump: Vec<u8>) {
self.broadcast_to_follower
.send(LeaderMsg::JoinFromDump(dump))
.expect("Lost the link with the followers");
}
// ============= Everything related to the scheduler
pub fn starts_batch(&self, batch: Batch) {
let mut batch_id = self.batch_id.write().unwrap();
info!("Send the batch to process to the followers");
*batch_id += 1;
self.broadcast_to_follower
.send(LeaderMsg::StartBatch { id: *batch_id, batch })
.expect("Can't reach the cluster");
}
pub fn commit(&self, consistency_level: Consistency) {
info!("Wait until enough followers are ready to commit a batch");
let batch_id = self.batch_id.write().unwrap();
let mut nodes_ready_to_commit = 1;
loop {
let size = self.active_followers.load(atomic::Ordering::Relaxed);
info!("{nodes_ready_to_commit} nodes are ready to commit for a cluster size of {size}");
let all = nodes_ready_to_commit == size;
match consistency_level {
Consistency::One if nodes_ready_to_commit >= 1 || all => break,
Consistency::Two if nodes_ready_to_commit >= 2 || all => break,
Consistency::Quorum if nodes_ready_to_commit >= (size / 2) || all => break,
Consistency::All if all => break,
_ => (),
}
// we can't wait forever here because if a node dies the cluster size might get updated while we're stuck
match self.task_ready_to_commit.recv_timeout(Duration::new(1, 0)) {
Ok(id) if id == *batch_id => nodes_ready_to_commit += 1,
_ => continue,
};
}
info!("Tells all the follower to commit");
self.broadcast_to_follower.send(LeaderMsg::Commit(*batch_id)).unwrap();
}
pub fn register_new_task(&self, task: Task, update_file: Option<Vec<u8>>) {
info!("Tells all the follower to register a new task");
self.broadcast_to_follower
.send(LeaderMsg::RegisterNewTask { task, update_file })
.expect("Main thread is dead");
}
// ============= Everything related to the api-keys
pub fn insert_key(&self, key: Key) {
self.broadcast_to_follower
.send(LeaderMsg::ApiKeyOperation(ApiKeyOperation::Insert(key)))
.unwrap()
}
pub fn delete_key(&self, uuid: Uuid) {
self.broadcast_to_follower
.send(LeaderMsg::ApiKeyOperation(ApiKeyOperation::Delete(uuid)))
.unwrap()
}
pub fn needs_keys(&self) -> Sender<Vec<Key>> {
self.needs_key_receiver.recv().expect("The cluster is dead")
}
pub fn get_keys(&self) -> Vec<Key> {
let (send, rcv) = crossbeam::channel::bounded(1);
self.needs_key_sender.send(send).expect("The cluster is dead");
rcv.recv().expect("The auth controller is dead")
}
}

View File

@@ -1,231 +0,0 @@
use std::net::ToSocketAddrs;
use std::str::FromStr;
use std::sync::{Arc, RwLock};
use batch::Batch;
use crossbeam::channel::{unbounded, Receiver, Sender};
use ductile::{connect_channel, connect_channel_with_enc, ChannelReceiver, ChannelSender};
use log::{info, warn};
use meilisearch_types::keys::Key;
use meilisearch_types::tasks::{KindWithContent, Task};
use serde::{Deserialize, Serialize};
pub mod batch;
mod leader;
pub use leader::Leader;
use uuid::Uuid;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Network issue occured")]
NetworkIssue,
#[error("Internal error:{0}")]
SerdeJson(#[from] serde_json::Error),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum LeaderMsg {
/// A dump to join the cluster
JoinFromDump(Vec<u8>),
/// Starts a new batch
StartBatch { id: u32, batch: Batch },
///Tell the follower to commit the update asap
Commit(u32),
///Tell the follower to commit the update asap
RegisterNewTask { task: Task, update_file: Option<Vec<u8>> },
///Tell the follower to commit the update asap
ApiKeyOperation(ApiKeyOperation),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FollowerMsg {
// Let the leader knows you're ready to commit
ReadyToCommit(u32),
RegisterNewTask(KindWithContent),
}
#[derive(Default, Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum Consistency {
One,
Two,
Quorum,
#[default]
All,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum ApiKeyOperation {
Insert(Key),
Delete(Uuid),
}
impl std::fmt::Display for Consistency {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Consistency::One => write!(f, "one"),
Consistency::Two => write!(f, "two"),
Consistency::Quorum => write!(f, "quorum"),
Consistency::All => write!(f, "all"),
}
}
}
impl FromStr for Consistency {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"one" => Ok(Consistency::One),
"two" => Ok(Consistency::Two),
"quorum" => Ok(Consistency::Quorum),
"all" => Ok(Consistency::All),
s => Err(format!(
"Unexpected value `{s}`, expected one of `one`, `two`, `quorum`, `all`"
)),
}
}
}
#[derive(Clone)]
pub enum Cluster {
Leader(Leader),
Follower(Follower),
}
#[derive(Clone)]
pub struct Follower {
sender: ChannelSender<FollowerMsg>,
get_batch: Receiver<(u32, Batch)>,
must_commit: Receiver<u32>,
register_new_task: Receiver<(Task, Option<Vec<u8>>)>,
api_key_op: Receiver<ApiKeyOperation>,
batch_id: Arc<RwLock<u32>>,
}
impl Follower {
pub fn join(leader: impl ToSocketAddrs, master_key: Option<String>) -> (Follower, Vec<u8>) {
let (sender, receiver) = if let Some(master_key) = master_key {
let mut enc = [0; 32];
let master_key = master_key.as_bytes();
if master_key.len() < 32 {
warn!("Master key is not secure, use a longer master key (at least 32 bytes long)");
}
enc.iter_mut().zip(master_key).for_each(|(enc, mk)| *enc = *mk);
info!("Connecting with encryption enabled");
connect_channel_with_enc(leader, &enc).unwrap()
} else {
connect_channel(leader).unwrap()
};
info!("Connection to the leader established");
info!("Waiting for the leader to contact us");
let state = receiver.recv().unwrap();
let dump = match state {
LeaderMsg::JoinFromDump(dump) => dump,
msg => panic!("Received unexpected message {msg:?}"),
};
let (get_batch_sender, get_batch_receiver) = unbounded();
let (must_commit_sender, must_commit_receiver) = unbounded();
let (register_task_sender, register_task_receiver) = unbounded();
let (create_api_key_sender, create_api_key_receiver) = unbounded();
std::thread::spawn(move || {
Self::router(
receiver,
get_batch_sender,
must_commit_sender,
register_task_sender,
create_api_key_sender,
);
});
(
Follower {
sender,
get_batch: get_batch_receiver,
must_commit: must_commit_receiver,
register_new_task: register_task_receiver,
api_key_op: create_api_key_receiver,
batch_id: Arc::default(),
},
dump,
)
}
fn router(
receiver: ChannelReceiver<LeaderMsg>,
get_batch: Sender<(u32, Batch)>,
must_commit: Sender<u32>,
register_new_task: Sender<(Task, Option<Vec<u8>>)>,
api_key_op: Sender<ApiKeyOperation>,
) {
loop {
match receiver.recv().expect("Lost connection to the leader") {
LeaderMsg::JoinFromDump(_) => {
warn!("Received a join from dump msg but Im already running : ignoring the message")
}
LeaderMsg::StartBatch { id, batch } => {
info!("Starting to process a new batch");
get_batch.send((id, batch)).expect("Lost connection to the main thread")
}
LeaderMsg::Commit(id) => {
info!("Must commit");
must_commit.send(id).expect("Lost connection to the main thread")
}
LeaderMsg::RegisterNewTask { task, update_file } => {
info!("Registered a new task");
register_new_task
.send((task, update_file))
.expect("Lost connection to the main thread")
}
LeaderMsg::ApiKeyOperation(key) => {
api_key_op.send(key).expect("Lost connection to the main thread")
}
}
}
}
pub fn get_new_batch(&self) -> Batch {
info!("Get new batch called");
let (id, batch) = self.get_batch.recv().expect("Lost connection to the leader");
info!("Got a new batch");
*self.batch_id.write().unwrap() = id;
batch
}
pub fn ready_to_commit(&self) {
info!("I'm ready to commit");
let batch_id = self.batch_id.read().unwrap();
self.sender.send(FollowerMsg::ReadyToCommit(*batch_id)).unwrap();
loop {
let id = self.must_commit.recv().expect("Lost connection to the leader");
#[allow(clippy::comparison_chain)]
if id == *batch_id {
break;
} else if id > *batch_id {
panic!("We missed a batch");
}
}
info!("I got the right to commit");
}
pub fn get_new_task(&self) -> (Task, Option<Vec<u8>>) {
self.register_new_task.recv().expect("Lost connection to the leader")
}
pub fn api_key_operation(&self) -> ApiKeyOperation {
info!("Creating a new api key");
self.api_key_op.recv().expect("Lost connection to the leader")
}
}

View File

@@ -118,13 +118,3 @@ ssl_resumption = false
ssl_tickets = false
# Activates SSL tickets.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-tickets
#############################
### Experimental features ###
#############################
experimental_enable_metrics = false
# Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
# Enables the Prometheus metrics on the `GET /metrics` endpoint.

View File

@@ -116,20 +116,10 @@ impl FileStore {
/// List the Uuids of the files in the FileStore
pub fn all_uuids(&self) -> Result<impl Iterator<Item = Result<Uuid>>> {
Ok(self.path.read_dir()?.filter_map(|entry| {
let file_name = match entry {
Ok(entry) => entry.file_name(),
Err(e) => return Some(Err(e.into())),
};
let file_name = match file_name.to_str() {
Some(file_name) => file_name,
None => return Some(Err(Error::CouldNotParseFileNameAsUtf8)),
};
if file_name.starts_with('.') {
None
} else {
Some(Uuid::from_str(file_name).map_err(|e| e.into()))
}
Ok(self.path.read_dir()?.map(|entry| {
Ok(Uuid::from_str(
entry?.file_name().to_str().ok_or(Error::CouldNotParseFileNameAsUtf8)?,
)?)
}))
}
}
@@ -145,34 +135,3 @@ impl File {
Ok(())
}
}
#[cfg(test)]
mod test {
use std::io::Write;
use tempfile::TempDir;
use super::*;
#[test]
fn all_uuids() {
let dir = TempDir::new().unwrap();
let fs = FileStore::new(dir.path()).unwrap();
let (uuid, mut file) = fs.new_update().unwrap();
file.write_all(b"Hello world").unwrap();
file.persist().unwrap();
let all_uuids = fs.all_uuids().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(all_uuids, vec![uuid]);
let (uuid2, file) = fs.new_update().unwrap();
let all_uuids = fs.all_uuids().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(all_uuids, vec![uuid]);
file.persist().unwrap();
let mut all_uuids = fs.all_uuids().unwrap().collect::<Result<Vec<_>>>().unwrap();
all_uuids.sort();
let mut expected = vec![uuid, uuid2];
expected.sort();
assert_eq!(all_uuids, expected);
}
}

View File

@@ -13,8 +13,6 @@ license.workspace = true
[dependencies]
anyhow = "1.0.64"
bincode = "1.3.3"
cluster = { path = "../cluster" }
crossbeam = "0.8.2"
csv = "1.1.6"
derive_builder = "0.11.2"
dump = { path = "../dump" }

View File

@@ -88,11 +88,11 @@ pub enum BatchKind {
DocumentClear {
ids: Vec<TaskId>,
},
DocumentOperation {
DocumentImport {
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
operation_ids: Vec<TaskId>,
import_ids: Vec<TaskId>,
},
DocumentDeletion {
deletion_ids: Vec<TaskId>,
@@ -102,12 +102,12 @@ pub enum BatchKind {
allow_index_creation: bool,
settings_ids: Vec<TaskId>,
},
SettingsAndDocumentOperation {
SettingsAndDocumentImport {
settings_ids: Vec<TaskId>,
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
operation_ids: Vec<TaskId>,
import_ids: Vec<TaskId>,
},
Settings {
allow_index_creation: bool,
@@ -131,9 +131,9 @@ impl BatchKind {
#[rustfmt::skip]
fn allow_index_creation(&self) -> Option<bool> {
match self {
BatchKind::DocumentOperation { allow_index_creation, .. }
BatchKind::DocumentImport { allow_index_creation, .. }
| BatchKind::ClearAndSettings { allow_index_creation, .. }
| BatchKind::SettingsAndDocumentOperation { allow_index_creation, .. }
| BatchKind::SettingsAndDocumentImport { allow_index_creation, .. }
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
_ => None,
}
@@ -141,8 +141,8 @@ impl BatchKind {
fn primary_key(&self) -> Option<Option<&str>> {
match self {
BatchKind::DocumentOperation { primary_key, .. }
| BatchKind::SettingsAndDocumentOperation { primary_key, .. } => {
BatchKind::DocumentImport { primary_key, .. }
| BatchKind::SettingsAndDocumentImport { primary_key, .. } => {
Some(primary_key.as_deref())
}
_ => None,
@@ -173,22 +173,22 @@ impl BatchKind {
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
{
(
Continue(BatchKind::DocumentOperation {
Continue(BatchKind::DocumentImport {
method,
allow_index_creation,
primary_key: pk,
operation_ids: vec![task_id],
import_ids: vec![task_id],
}),
allow_index_creation,
)
}
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
K::DocumentImport { method, allow_index_creation, primary_key } => (
Break(BatchKind::DocumentOperation {
Break(BatchKind::DocumentImport {
method,
allow_index_creation,
primary_key,
operation_ids: vec![task_id],
import_ids: vec![task_id],
}),
allow_index_creation,
),
@@ -249,7 +249,7 @@ impl BatchKind {
(
BatchKind::DocumentClear { mut ids }
| BatchKind::DocumentDeletion { deletion_ids: mut ids }
| BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids }
| BatchKind::DocumentImport { method: _, allow_index_creation: _, primary_key: _, import_ids: mut ids }
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
K::IndexDeletion,
) => {
@@ -258,7 +258,7 @@ impl BatchKind {
}
(
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }
| BatchKind::SettingsAndDocumentOperation { operation_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
| BatchKind::SettingsAndDocumentImport { import_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
K::IndexDeletion,
) => {
ids.push(id);
@@ -278,108 +278,63 @@ impl BatchKind {
K::DocumentImport { .. } | K::Settings { .. },
) => Break(this),
(
BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, mut operation_ids },
BatchKind::DocumentImport { method: _, allow_index_creation: _, primary_key: _, import_ids: mut ids },
K::DocumentClear,
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentClear { ids: operation_ids })
ids.push(id);
Continue(BatchKind::DocumentClear { ids })
}
// we can autobatch the same kind of document additions / updates
(
BatchKind::DocumentOperation { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut operation_ids },
BatchKind::DocumentImport { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut import_ids },
K::DocumentImport { method: ReplaceDocuments, primary_key: pk, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
method: ReplaceDocuments,
allow_index_creation,
operation_ids,
import_ids,
primary_key: pk,
})
}
(
BatchKind::DocumentOperation { method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
BatchKind::DocumentImport { method: UpdateDocuments, allow_index_creation, primary_key: _, mut import_ids },
K::DocumentImport { method: UpdateDocuments, primary_key: pk, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
method: UpdateDocuments,
allow_index_creation,
primary_key: pk,
operation_ids,
import_ids,
})
}
(
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
K::DocumentDeletion,
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids,
})
}
// but we can't autobatch documents if it's not the same kind
// this match branch MUST be AFTER the previous one
(
this @ BatchKind::DocumentOperation { .. },
K::DocumentImport { .. },
this @ BatchKind::DocumentImport { .. },
K::DocumentDeletion | K::DocumentImport { .. },
) => Break(this),
(
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, operation_ids },
BatchKind::DocumentImport { method, allow_index_creation, primary_key, import_ids },
K::Settings { .. },
) => Continue(BatchKind::SettingsAndDocumentOperation {
) => Continue(BatchKind::SettingsAndDocumentImport {
settings_ids: vec![id],
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
}),
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentClear { ids: deletion_ids })
}
// we can autobatch the deletion and import if the index already exists
(
BatchKind::DocumentDeletion { mut deletion_ids },
K::DocumentImport { method, allow_index_creation, primary_key }
) if index_already_exists => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can autobatch the deletion and import if both can't create an index
(
BatchKind::DocumentDeletion { mut deletion_ids },
K::DocumentImport { method, allow_index_creation, primary_key }
) if !allow_index_creation => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
(
this @ BatchKind::DocumentDeletion { .. },
K::DocumentImport { .. }
) => {
Break(this)
}
(this @ BatchKind::DocumentDeletion { .. }, K::DocumentImport { .. }) => Break(this),
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentDeletion { deletion_ids })
@@ -448,60 +403,60 @@ impl BatchKind {
})
}
(
BatchKind::SettingsAndDocumentOperation { settings_ids, method: _, mut operation_ids, allow_index_creation, primary_key: _ },
BatchKind::SettingsAndDocumentImport { settings_ids, method: _, import_ids: mut other, allow_index_creation, primary_key: _ },
K::DocumentClear,
) => {
operation_ids.push(id);
other.push(id);
Continue(BatchKind::ClearAndSettings {
settings_ids,
other: operation_ids,
other,
allow_index_creation,
})
}
(
BatchKind::SettingsAndDocumentOperation { settings_ids, method: ReplaceDocuments, mut operation_ids, allow_index_creation, primary_key: _},
BatchKind::SettingsAndDocumentImport { settings_ids, method: ReplaceDocuments, mut import_ids, allow_index_creation, primary_key: _},
K::DocumentImport { method: ReplaceDocuments, primary_key: pk2, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method: ReplaceDocuments,
allow_index_creation,
primary_key: pk2,
operation_ids,
import_ids,
})
}
(
BatchKind::SettingsAndDocumentOperation { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
BatchKind::SettingsAndDocumentImport { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut import_ids },
K::DocumentImport { method: UpdateDocuments, primary_key: pk2, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method: UpdateDocuments,
allow_index_creation,
primary_key: pk2,
operation_ids,
import_ids,
})
}
// But we can't batch a settings and a doc op with another doc op
// this MUST be AFTER the two previous branch
(
this @ BatchKind::SettingsAndDocumentOperation { .. },
this @ BatchKind::SettingsAndDocumentImport { .. },
K::DocumentDeletion | K::DocumentImport { .. },
) => Break(this),
(
BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
BatchKind::SettingsAndDocumentImport { mut settings_ids, method, allow_index_creation,primary_key, import_ids },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
})
}
(
@@ -633,29 +588,29 @@ mod tests {
fn autobatch_simple_operation_together() {
// we can autobatch one or multiple `ReplaceDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
// we can autobatch one or multiple `UpdateDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
// we can autobatch one or multiple DocumentDeletion together
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
@@ -673,83 +628,56 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
// We can autobatch document addition with document deletion
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
// And the other way around
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
}
#[test]
fn simple_document_operation_dont_autobatch_with_other() {
// addition, updates and deletion can't batch together
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
}
#[test]
fn document_addition_batch_with_settings() {
// simple case
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
// multiple settings and doc addition
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
// addition and setting unordered
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 2] }, true))");
// We ensure this kind of batch doesn't batch with forbidden operations
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
}
#[test]
@@ -861,73 +789,67 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
// The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whith what
// follows because we first need to process the erronous batch.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
}
#[test]
fn allowed_and_disallowed_index_creation() {
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// batch deletion and addition
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
}
#[test]
fn autobatch_primary_key() {
// ==> If I have a pk
// With a single update
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
// ==> If I don't have a pk
// With a single update
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
}
}

View File

@@ -22,14 +22,14 @@ use std::ffi::OsStr;
use std::fs::{self, File};
use std::io::BufWriter;
use crossbeam::utils::Backoff;
use dump::{DumpWriter, IndexMetadata};
use dump::IndexMetadata;
use log::{debug, error, info};
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings,
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Settings as MilliSettings,
};
use meilisearch_types::milli::{self, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
@@ -42,14 +42,14 @@ use uuid::Uuid;
use crate::autobatcher::{self, BatchKind};
use crate::utils::{self, swap_index_uid_in_task};
use crate::{Cluster, Error, IndexScheduler, ProcessingTasks, Result, TaskId};
use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
/// Represents a combination of tasks that can all be processed at the same time.
///
/// A batch contains the set of tasks that it represents (accessible through
/// [`self.ids()`](Batch::ids)), as well as additional information on how to
/// be processed.
#[derive(Debug, Clone)]
#[derive(Debug)]
pub(crate) enum Batch {
TaskCancelation {
/// The task cancelation itself.
@@ -86,21 +86,15 @@ pub(crate) enum Batch {
},
}
#[derive(Debug, Clone)]
pub(crate) enum DocumentOperation {
Add(Uuid),
Delete(Vec<String>),
}
/// A [batch](Batch) that combines multiple tasks operating on an index.
#[derive(Debug, Clone)]
#[derive(Debug)]
pub(crate) enum IndexOperation {
DocumentOperation {
DocumentImport {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
operations: Vec<DocumentOperation>,
content_files: Vec<Uuid>,
tasks: Vec<Task>,
},
DocumentDeletion {
@@ -127,13 +121,13 @@ pub(crate) enum IndexOperation {
settings: Vec<(bool, Settings<Unchecked>)>,
settings_tasks: Vec<Task>,
},
SettingsAndDocumentOperation {
SettingsAndDocumentImport {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
operations: Vec<DocumentOperation>,
content_files: Vec<Uuid>,
document_import_tasks: Vec<Task>,
// The boolean indicates if it's a settings deletion or creation.
@@ -155,13 +149,13 @@ impl Batch {
tasks.iter().map(|task| task.uid).collect()
}
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. }
IndexOperation::DocumentImport { tasks, .. }
| IndexOperation::DocumentDeletion { tasks, .. }
| IndexOperation::Settings { tasks, .. }
| IndexOperation::DocumentClear { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
}
IndexOperation::SettingsAndDocumentOperation {
IndexOperation::SettingsAndDocumentImport {
document_import_tasks: tasks,
settings_tasks: other,
..
@@ -175,33 +169,17 @@ impl Batch {
Batch::IndexSwap { task } => vec![task.uid],
}
}
/// Return the index UID associated with this batch
pub fn index_uid(&self) -> Option<&str> {
use Batch::*;
match self {
TaskCancelation { .. }
| TaskDeletion(_)
| SnapshotCreation(_)
| Dump(_)
| IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. } => Some(index_uid),
}
}
}
impl IndexOperation {
pub fn index_uid(&self) -> &str {
match self {
IndexOperation::DocumentOperation { index_uid, .. }
IndexOperation::DocumentImport { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
| IndexOperation::SettingsAndDocumentOperation { index_uid, .. } => index_uid,
| IndexOperation::SettingsAndDocumentImport { index_uid, .. } => index_uid,
}
}
}
@@ -228,22 +206,17 @@ impl IndexScheduler {
},
must_create_index,
})),
BatchKind::DocumentOperation { method, operation_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
let primary_key = tasks
.iter()
.find_map(|task| match task.kind {
KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => {
// we want to stop on the first document addition
Some(primary_key.clone())
}
KindWithContent::DocumentDeletion { .. } => None,
_ => unreachable!(),
})
.flatten();
BatchKind::DocumentImport { method, import_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, import_ids)?;
let primary_key = match &tasks[0].kind {
KindWithContent::DocumentAdditionOrUpdate { primary_key, .. } => {
primary_key.clone()
}
_ => unreachable!(),
};
let mut documents_counts = Vec::new();
let mut operations = Vec::new();
let mut content_files = Vec::new();
for task in tasks.iter() {
match task.kind {
@@ -253,23 +226,19 @@ impl IndexScheduler {
..
} => {
documents_counts.push(documents_count);
operations.push(DocumentOperation::Add(content_file));
}
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
documents_counts.push(documents_ids.len() as u64);
operations.push(DocumentOperation::Delete(documents_ids.clone()));
content_files.push(content_file);
}
_ => unreachable!(),
}
}
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentOperation {
op: IndexOperation::DocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
tasks,
},
must_create_index,
@@ -353,12 +322,12 @@ impl IndexScheduler {
must_create_index,
}))
}
BatchKind::SettingsAndDocumentOperation {
BatchKind::SettingsAndDocumentImport {
settings_ids,
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
} => {
let settings = self.create_next_batch_index(
rtxn,
@@ -370,11 +339,11 @@ impl IndexScheduler {
let document_import = self.create_next_batch_index(
rtxn,
index_uid.clone(),
BatchKind::DocumentOperation {
BatchKind::DocumentImport {
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
},
must_create_index,
)?;
@@ -383,10 +352,10 @@ impl IndexScheduler {
(
Some(Batch::IndexOperation {
op:
IndexOperation::DocumentOperation {
IndexOperation::DocumentImport {
primary_key,
documents_counts,
operations,
content_files,
tasks: document_import_tasks,
..
},
@@ -397,12 +366,12 @@ impl IndexScheduler {
..
}),
) => Ok(Some(Batch::IndexOperation {
op: IndexOperation::SettingsAndDocumentOperation {
op: IndexOperation::SettingsAndDocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
document_import_tasks,
settings,
settings_tasks,
@@ -587,12 +556,6 @@ impl IndexScheduler {
_ => unreachable!(),
}
match &self.cluster {
Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
None => (),
}
// We must only remove the content files if the transaction is successfully committed
// and if errors occurs when we are deleting files we must do our best to delete
// everything. We do not return the encountered errors when deleting the content
@@ -636,13 +599,6 @@ impl IndexScheduler {
}
_ => unreachable!(),
}
match &self.cluster {
Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
None => (),
}
wtxn.commit()?;
Ok(vec![task])
}
@@ -737,9 +693,95 @@ impl IndexScheduler {
Ok(tasks)
}
Batch::Dump(mut task) => {
// TODO: It would be better to use the started_at from the task instead of generating a new one
let started_at = OffsetDateTime::now_utc();
let dump = self.create_dump(&task, &started_at)?;
let (keys, instance_uid) =
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
(keys, instance_uid)
} else {
unreachable!();
};
let dump = dump::DumpWriter::new(*instance_uid)?;
// 1. dump the keys
let mut dump_keys = dump.create_keys()?;
for key in keys {
dump_keys.push_key(key)?;
}
dump_keys.flush()?;
let rtxn = self.env.read_txn()?;
// 2. dump the tasks
let mut dump_tasks = dump.create_tasks_queue()?;
for ret in self.all_tasks.iter(&rtxn)? {
let (_, mut t) = ret?;
let status = t.status;
let content_file = t.content_uuid();
// In the case we're dumping ourselves we want to be marked as finished
// to not loop over ourselves indefinitely.
if t.uid == task.uid {
let finished_at = OffsetDateTime::now_utc();
// We're going to fake the date because we don't know if everything is going to go well.
// But we need to dump the task as finished and successful.
// If something fail everything will be set appropriately in the end.
t.status = Status::Succeeded;
t.started_at = Some(started_at);
t.finished_at = Some(finished_at);
}
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
if let Some(content_file) = content_file {
if status == Status::Enqueued {
let content_file = self.file_store.get_update(content_file)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (mut cursor, documents_batch_index) =
reader.into_cursor_and_fields_index();
while let Some(doc) =
cursor.next_document().map_err(milli::Error::from)?
{
dump_content_file.push_document(&obkv_to_object(
&doc,
&documents_batch_index,
)?)?;
}
dump_content_file.flush()?;
}
}
}
dump_tasks.flush()?;
// 3. Dump the indexes
for (uid, index) in self.index_mapper.indexes(&rtxn)? {
let rtxn = index.read_txn()?;
let metadata = IndexMetadata {
uid: uid.clone(),
primary_key: index.primary_key(&rtxn)?.map(String::from),
created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at(&rtxn)?,
};
let mut index_dumper = dump.create_index(&uid, &metadata)?;
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
// 3.1. Dump the documents
for ret in index.all_documents(&rtxn)? {
let (_id, doc) = ret?;
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?;
}
// 3.2. Dump the settings
let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
index_dumper.settings(&settings)?;
}
let dump_uid = started_at.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
@@ -767,13 +809,6 @@ impl IndexScheduler {
let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
match &self.cluster {
Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
None => (),
}
index_wtxn.commit()?;
Ok(tasks)
@@ -872,13 +907,6 @@ impl IndexScheduler {
for swap in swaps {
self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?;
}
match &self.cluster {
Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
None => (),
}
wtxn.commit()?;
task.status = Status::Succeeded;
Ok(vec![task])
@@ -886,99 +914,6 @@ impl IndexScheduler {
}
}
pub(crate) fn create_dump(
&self,
task: &Task,
started_at: &OffsetDateTime,
) -> Result<DumpWriter> {
let (keys, instance_uid) =
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
(keys, instance_uid)
} else {
unreachable!();
};
let dump = dump::DumpWriter::new(*instance_uid)?;
// 1. dump the keys
let mut dump_keys = dump.create_keys()?;
for key in keys {
dump_keys.push_key(key)?;
}
dump_keys.flush()?;
let rtxn = self.env.read_txn()?;
// 2. dump the tasks
let mut dump_tasks = dump.create_tasks_queue()?;
for ret in self.all_tasks.iter(&rtxn)? {
let (_, mut t) = ret?;
let status = t.status;
let content_file = t.content_uuid();
// In the case we're dumping ourselves we want to be marked as finished
// to not loop over ourselves indefinitely.
if t.uid == task.uid {
let finished_at = OffsetDateTime::now_utc();
// We're going to fake the date because we don't know if everything is going to go well.
// But we need to dump the task as finished and successful.
// If something fail everything will be set appropriately in the end.
t.status = Status::Succeeded;
t.started_at = Some(*started_at);
t.finished_at = Some(finished_at);
}
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
if let Some(content_file) = content_file {
if status == Status::Enqueued {
let content_file = self.file_store.get_update(content_file)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
while let Some(doc) = cursor.next_document().map_err(milli::Error::from)? {
dump_content_file
.push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
}
dump_content_file.flush()?;
}
}
}
dump_tasks.flush()?;
// 3. Dump the indexes
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
let rtxn = index.read_txn()?;
let metadata = IndexMetadata {
uid: uid.to_owned(),
primary_key: index.primary_key(&rtxn)?.map(String::from),
created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at(&rtxn)?,
};
let mut index_dumper = dump.create_index(uid, &metadata)?;
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
// 3.1. Dump the documents
for ret in index.all_documents(&rtxn)? {
let (_id, doc) = ret?;
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?;
}
// 3.2. Dump the settings
let settings = meilisearch_types::settings::settings(index, &rtxn)?;
index_dumper.settings(&settings)?;
Ok(())
})?;
Ok(dump)
}
/// Swap the index `lhs` with the index `rhs`.
fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> {
// 1. Verify that both lhs and rhs are existing indexes
@@ -1052,12 +987,12 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::DocumentOperation {
IndexOperation::DocumentImport {
index_uid: _,
primary_key,
method,
documents_counts: _,
operations,
documents_counts,
content_files,
mut tasks,
} => {
let mut primary_key_has_been_set = false;
@@ -1102,82 +1037,26 @@ impl IndexScheduler {
|| must_stop_processing.get(),
)?;
for (operation, task) in operations.into_iter().zip(tasks.iter_mut()) {
match operation {
DocumentOperation::Add(content_uuid) => {
let content_file = self.file_store.get_update(content_uuid)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
let mut results = Vec::new();
for content_uuid in content_files.into_iter() {
let content_file = self.file_store.get_update(content_uuid)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
let received_documents =
if let Some(Details::DocumentAdditionOrUpdate {
received_documents,
..
}) = task.details
{
received_documents
} else {
// In the case of a `documentAdditionOrUpdate` the details MUST be set
unreachable!();
};
let user_result = match user_result {
Ok(count) => Ok(DocumentAdditionResult {
indexed_documents: count,
number_of_documents: count, // TODO: this is wrong, we should use the value stored in the Details.
}),
Err(e) => Err(milli::Error::from(e)),
};
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(count),
})
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
DocumentOperation::Delete(document_ids) => {
let (new_builder, user_result) =
builder.remove_documents(document_ids)?;
builder = new_builder;
let provided_ids =
if let Some(Details::DocumentDeletion { provided_ids, .. }) =
task.details
{
provided_ids
} else {
// In the case of a `documentAdditionOrUpdate` the details MUST be set
unreachable!();
};
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(count),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
}
results.push(user_result);
}
if !tasks.iter().all(|res| res.error.is_some()) {
if results.iter().any(|res| res.is_ok()) {
let addition = builder.execute()?;
info!("document addition done: {:?}", addition);
} else if primary_key_has_been_set {
@@ -1192,6 +1071,29 @@ impl IndexScheduler {
)?;
}
for (task, (ret, count)) in
tasks.iter_mut().zip(results.into_iter().zip(documents_counts))
{
match ret {
Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents: number_of_documents,
indexed_documents: Some(indexed_documents),
});
}
Err(error) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents: count,
// if there was an error we indexed 0 documents.
indexed_documents: Some(0),
});
task.error = Some(error.into())
}
}
}
Ok(tasks)
}
IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
@@ -1234,12 +1136,12 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::SettingsAndDocumentOperation {
IndexOperation::SettingsAndDocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
document_import_tasks,
settings,
settings_tasks,
@@ -1257,12 +1159,12 @@ impl IndexScheduler {
let mut import_tasks = self.apply_index_operation(
index_wtxn,
index,
IndexOperation::DocumentOperation {
IndexOperation::DocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
tasks: document_import_tasks,
},
)?;
@@ -1409,274 +1311,4 @@ impl IndexScheduler {
Ok(content_files_to_delete)
}
pub(crate) fn get_batch_from_cluster_batch(
&self,
batch: cluster::batch::Batch,
) -> Result<Batch> {
use cluster::batch::Batch as CBatch;
let mut rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
for id in batch.ids() {
let backoff = Backoff::new();
let id = BEU32::new(id);
loop {
if self.all_tasks.get(&rtxn, &id)?.is_some() {
info!("Found the task_id");
break;
}
info!("The task is not present in the task queue, we wait");
// we need to drop the txn to make a write visible
drop(rtxn);
backoff.spin();
rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
}
}
Ok(match batch {
CBatch::TaskCancelation { task, previous_started_at, previous_processing_tasks } => {
Batch::TaskCancelation {
task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone(),
previous_started_at,
previous_processing_tasks,
}
}
CBatch::TaskDeletion(task) => {
Batch::TaskDeletion(self.get_existing_tasks(&rtxn, Some(task))?[0].clone())
}
CBatch::SnapshotCreation(tasks) => {
Batch::SnapshotCreation(self.get_existing_tasks(&rtxn, tasks)?)
}
CBatch::Dump(task) => {
Batch::Dump(self.get_existing_tasks(&rtxn, Some(task))?[0].clone())
}
CBatch::IndexOperation { op, must_create_index } => Batch::IndexOperation {
op: self.get_index_op_from_cluster_index_op(&rtxn, op)?,
must_create_index,
},
CBatch::IndexCreation { index_uid, primary_key, task } => Batch::IndexCreation {
index_uid,
primary_key,
task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone(),
},
CBatch::IndexUpdate { index_uid, primary_key, task } => Batch::IndexUpdate {
index_uid,
primary_key,
task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone(),
},
CBatch::IndexDeletion { index_uid, tasks, index_has_been_created } => {
Batch::IndexDeletion {
index_uid,
tasks: self.get_existing_tasks(&rtxn, tasks)?,
index_has_been_created,
}
}
CBatch::IndexSwap { task } => {
Batch::IndexSwap { task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone() }
}
})
}
pub(crate) fn get_index_op_from_cluster_index_op(
&self,
rtxn: &RoTxn,
op: cluster::batch::IndexOperation,
) -> Result<IndexOperation> {
use cluster::batch::IndexOperation as COp;
Ok(match op {
COp::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations,
tasks,
} => IndexOperation::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations: operations.into_iter().map(|op| op.into()).collect(),
tasks: self.get_existing_tasks(rtxn, tasks)?,
},
COp::DocumentDeletion { index_uid, documents, tasks } => {
IndexOperation::DocumentDeletion {
index_uid,
documents,
tasks: self.get_existing_tasks(rtxn, tasks)?,
}
}
COp::DocumentClear { index_uid, tasks } => IndexOperation::DocumentClear {
index_uid,
tasks: self.get_existing_tasks(rtxn, tasks)?,
},
COp::Settings { index_uid, settings, tasks } => IndexOperation::Settings {
index_uid,
settings,
tasks: self.get_existing_tasks(rtxn, tasks)?,
},
COp::DocumentClearAndSetting { index_uid, cleared_tasks, settings, settings_tasks } => {
IndexOperation::DocumentClearAndSetting {
index_uid,
cleared_tasks: self.get_existing_tasks(rtxn, cleared_tasks)?,
settings,
settings_tasks: self.get_existing_tasks(rtxn, settings_tasks)?,
}
}
COp::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations,
document_import_tasks,
settings,
settings_tasks,
} => IndexOperation::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations: operations.into_iter().map(|op| op.into()).collect(),
document_import_tasks: self.get_existing_tasks(rtxn, document_import_tasks)?,
settings,
settings_tasks: self.get_existing_tasks(rtxn, settings_tasks)?,
},
})
}
}
impl From<Batch> for cluster::batch::Batch {
fn from(batch: Batch) -> Self {
use cluster::batch::Batch as CBatch;
match batch {
Batch::TaskCancelation { task, previous_started_at, previous_processing_tasks } => {
CBatch::TaskCancelation {
task: task.uid,
previous_started_at,
previous_processing_tasks,
}
}
Batch::TaskDeletion(task) => CBatch::TaskDeletion(task.uid),
Batch::SnapshotCreation(task) => {
CBatch::SnapshotCreation(task.into_iter().map(|task| task.uid).collect())
}
Batch::Dump(task) => CBatch::Dump(task.uid),
Batch::IndexOperation { op, must_create_index } => {
CBatch::IndexOperation { op: op.into(), must_create_index }
}
Batch::IndexCreation { index_uid, primary_key, task } => {
CBatch::IndexCreation { index_uid, primary_key, task: task.uid }
}
Batch::IndexUpdate { index_uid, primary_key, task } => {
CBatch::IndexUpdate { index_uid, primary_key, task: task.uid }
}
Batch::IndexDeletion { index_uid, tasks, index_has_been_created } => {
CBatch::IndexDeletion {
index_uid,
tasks: tasks.into_iter().map(|task| task.uid).collect(),
index_has_been_created,
}
}
Batch::IndexSwap { task } => CBatch::IndexSwap { task: task.uid },
}
}
}
impl From<IndexOperation> for cluster::batch::IndexOperation {
fn from(op: IndexOperation) -> Self {
use cluster::batch::IndexOperation as COp;
match op {
IndexOperation::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations,
tasks,
} => COp::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations: operations.into_iter().map(|op| op.into()).collect(),
tasks: tasks.into_iter().map(|task| task.uid).collect(),
},
IndexOperation::DocumentDeletion { index_uid, documents, tasks } => {
COp::DocumentDeletion {
index_uid,
documents,
tasks: tasks.into_iter().map(|task| task.uid).collect(),
}
}
IndexOperation::DocumentClear { index_uid, tasks } => COp::DocumentClear {
index_uid,
tasks: tasks.into_iter().map(|task| task.uid).collect(),
},
IndexOperation::Settings { index_uid, settings, tasks } => COp::Settings {
index_uid,
settings,
tasks: tasks.into_iter().map(|task| task.uid).collect(),
},
IndexOperation::DocumentClearAndSetting {
index_uid,
cleared_tasks,
settings,
settings_tasks,
} => COp::DocumentClearAndSetting {
index_uid,
cleared_tasks: cleared_tasks.into_iter().map(|task| task.uid).collect(),
settings,
settings_tasks: settings_tasks.into_iter().map(|task| task.uid).collect(),
},
IndexOperation::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations,
document_import_tasks,
settings,
settings_tasks,
} => COp::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
operations: operations.into_iter().map(|op| op.into()).collect(),
document_import_tasks: document_import_tasks
.into_iter()
.map(|task| task.uid)
.collect(),
settings,
settings_tasks: settings_tasks.into_iter().map(|task| task.uid).collect(),
},
}
}
}
impl From<DocumentOperation> for cluster::batch::DocumentOperation {
fn from(op: DocumentOperation) -> Self {
use cluster::batch::DocumentOperation as COp;
match op {
DocumentOperation::Add(uuid) => COp::Add(uuid),
DocumentOperation::Delete(docs) => COp::Delete(docs),
}
}
}
impl From<cluster::batch::DocumentOperation> for DocumentOperation {
fn from(op: cluster::batch::DocumentOperation) -> Self {
use cluster::batch::DocumentOperation as COp;
match op {
COp::Add(uuid) => DocumentOperation::Add(uuid),
COp::Delete(docs) => DocumentOperation::Delete(docs),
}
}
}

View File

@@ -0,0 +1,250 @@
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock};
use std::{fs, thread};
use log::error;
use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::Index;
use time::OffsetDateTime;
use uuid::Uuid;
use self::IndexStatus::{Available, BeingDeleted};
use crate::uuid_codec::UuidCodec;
use crate::{clamp_to_page_size, Error, Result};
const INDEX_MAPPING: &str = "index-mapping";
/// Structure managing meilisearch's indexes.
///
/// It is responsible for:
/// 1. Creating new indexes
/// 2. Opening indexes and storing references to these opened indexes
/// 3. Accessing indexes through their uuid
/// 4. Mapping a user-defined name to each index uuid.
#[derive(Clone)]
pub struct IndexMapper {
/// Keep track of the opened indexes. Used mainly by the index resolver.
index_map: Arc<RwLock<HashMap<Uuid, IndexStatus>>>,
/// Map an index name with an index uuid currently available on disk.
pub(crate) index_mapping: Database<Str, UuidCodec>,
/// Path to the folder where the LMDB environments of each index are.
base_path: PathBuf,
index_size: usize,
pub indexer_config: Arc<IndexerConfig>,
}
/// Whether the index is available for use or is forbidden to be inserted back in the index map
#[allow(clippy::large_enum_variant)]
#[derive(Clone)]
pub enum IndexStatus {
/// Do not insert it back in the index map as it is currently being deleted.
BeingDeleted,
/// You can use the index without worrying about anything.
Available(Index),
}
impl IndexMapper {
pub fn new(
env: &Env,
base_path: PathBuf,
index_size: usize,
indexer_config: IndexerConfig,
) -> Result<Self> {
Ok(Self {
index_map: Arc::default(),
index_mapping: env.create_database(Some(INDEX_MAPPING))?,
base_path,
index_size,
indexer_config: Arc::new(indexer_config),
})
}
/// Create or open an index in the specified path.
/// The path *must* exists or an error will be thrown.
fn create_or_open_index(
&self,
path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>,
) -> Result<Index> {
let mut options = EnvOpenOptions::new();
options.map_size(clamp_to_page_size(self.index_size));
options.max_readers(1024);
if let Some((created, updated)) = date {
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
} else {
Ok(Index::new(options, path)?)
}
}
/// Get or create the index.
pub fn create_index(
&self,
mut wtxn: RwTxn,
name: &str,
date: Option<(OffsetDateTime, OffsetDateTime)>,
) -> Result<Index> {
match self.index(&wtxn, name) {
Ok(index) => {
wtxn.commit()?;
Ok(index)
}
Err(Error::IndexNotFound(_)) => {
let uuid = Uuid::new_v4();
self.index_mapping.put(&mut wtxn, name, &uuid)?;
let index_path = self.base_path.join(uuid.to_string());
fs::create_dir_all(&index_path)?;
let index = self.create_or_open_index(&index_path, date)?;
wtxn.commit()?;
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
if let Some(BeingDeleted) =
self.index_map.write().unwrap().insert(uuid, Available(index.clone()))
{
panic!("Uuid v4 conflict.");
}
Ok(index)
}
error => error,
}
}
/// Removes the index from the mapping table and the in-memory index map
/// but keeps the associated tasks.
pub fn delete_index(&self, mut wtxn: RwTxn, name: &str) -> Result<()> {
let uuid = self
.index_mapping
.get(&wtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// Once we retrieved the UUID of the index we remove it from the mapping table.
assert!(self.index_mapping.delete(&mut wtxn, name)?);
wtxn.commit()?;
// We remove the index from the in-memory index map.
let mut lock = self.index_map.write().unwrap();
let closing_event = match lock.insert(uuid, BeingDeleted) {
Some(Available(index)) => Some(index.prepare_for_closing()),
_ => None,
};
drop(lock);
let index_map = self.index_map.clone();
let index_path = self.base_path.join(uuid.to_string());
let index_name = name.to_string();
thread::Builder::new()
.name(String::from("index_deleter"))
.spawn(move || {
// We first wait to be sure that the previously opened index is effectively closed.
// This can take a lot of time, this is why we do that in a seperate thread.
if let Some(closing_event) = closing_event {
closing_event.wait();
}
// Then we remove the content from disk.
if let Err(e) = fs::remove_dir_all(&index_path) {
error!(
"An error happened when deleting the index {} ({}): {}",
index_name, uuid, e
);
}
// Finally we remove the entry from the index map.
assert!(matches!(index_map.write().unwrap().remove(&uuid), Some(BeingDeleted)));
})
.unwrap();
Ok(())
}
pub fn exists(&self, rtxn: &RoTxn, name: &str) -> Result<bool> {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
let uuid = self
.index_mapping
.get(rtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// we clone here to drop the lock before entering the match
let index = self.index_map.read().unwrap().get(&uuid).cloned();
let index = match index {
Some(Available(index)) => index,
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
None => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two search happens
// at the same time), thus before opening it we check a second time
// if it's not already there.
// Since there is a good chance it's not already there we can use
// the entry method.
match index_map.entry(uuid) {
Entry::Vacant(entry) => {
let index_path = self.base_path.join(uuid.to_string());
let index = self.create_or_open_index(&index_path, None)?;
entry.insert(Available(index.clone()));
index
}
Entry::Occupied(entry) => match entry.get() {
Available(index) => index.clone(),
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
},
}
}
};
Ok(index)
}
/// Return all indexes, may open them if they weren't already opened.
pub fn indexes(&self, rtxn: &RoTxn) -> Result<Vec<(String, Index)>> {
self.index_mapping
.iter(rtxn)?
.map(|ret| {
ret.map_err(Error::from).and_then(|(name, _)| {
self.index(rtxn, name).map(|index| (name.to_string(), index))
})
})
.collect()
}
/// Swap two index names.
pub fn swap(&self, wtxn: &mut RwTxn, lhs: &str, rhs: &str) -> Result<()> {
let lhs_uuid = self
.index_mapping
.get(wtxn, lhs)?
.ok_or_else(|| Error::IndexNotFound(lhs.to_string()))?;
let rhs_uuid = self
.index_mapping
.get(wtxn, rhs)?
.ok_or_else(|| Error::IndexNotFound(rhs.to_string()))?;
self.index_mapping.put(wtxn, lhs, &rhs_uuid)?;
self.index_mapping.put(wtxn, rhs, &lhs_uuid)?;
Ok(())
}
pub fn index_exists(&self, rtxn: &RoTxn, name: &str) -> Result<bool> {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
pub fn indexer_config(&self) -> &IndexerConfig {
&self.indexer_config
}
}

View File

@@ -1,370 +0,0 @@
/// the map size to use when we don't succeed in reading it in indexes.
const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
use std::collections::BTreeMap;
use std::path::Path;
use std::time::Duration;
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
use meilisearch_types::milli::Index;
use time::OffsetDateTime;
use uuid::Uuid;
use super::IndexStatus::{self, Available, BeingDeleted, Closing, Missing};
use crate::lru::{InsertionOutcome, LruMap};
use crate::{clamp_to_page_size, Result};
/// Keep an internally consistent view of the open indexes in memory.
///
/// This view is made of an LRU cache that will evict the least frequently used indexes when new indexes are opened.
/// Indexes that are being closed (for resizing or due to cache eviction) or deleted cannot be evicted from the cache and
/// are stored separately.
///
/// This view provides operations to change the state of the index as it is known in memory:
/// open an index (making it available for queries), close an index (specifying the new size it should be opened with),
/// delete an index.
///
/// External consistency with the other bits of data of an index is provided by the `IndexMapper` parent structure.
pub struct IndexMap {
/// A LRU map of indexes that are in the open state and available for queries.
available: LruMap<Uuid, Index>,
/// A map of indexes that are not available for queries, either because they are being deleted
/// or because they are being closed.
///
/// If they are being deleted, the UUID points to `None`.
unavailable: BTreeMap<Uuid, Option<ClosingIndex>>,
/// A monotonically increasing generation number, used to differentiate between multiple successive index closing requests.
///
/// Because multiple readers could be waiting on an index to close, the following could theoretically happen:
///
/// 1. Multiple readers wait for the index closing to occur.
/// 2. One of them "wins the race", takes the lock and then removes the index that finished closing from the map.
/// 3. The index is reopened, but must be closed again (such as being resized again).
/// 4. One reader that "lost the race" in (2) wakes up and tries to take the lock and remove the index from the map.
///
/// In that situation, the index may or may not have finished closing. The `generation` field allows to remember which
/// closing request was made, so the reader that "lost the race" has the old generation and will need to wait again for the index
/// to close.
generation: usize,
}
#[derive(Clone)]
pub struct ClosingIndex {
uuid: Uuid,
closing_event: EnvClosingEvent,
map_size: usize,
generation: usize,
}
impl ClosingIndex {
/// Waits for the index to be definitely closed.
///
/// To avoid blocking, users should relinquish their locks to the IndexMap before calling this function.
///
/// After the index is physically closed, the in memory map must still be updated to take this into account.
/// To do so, a `ReopenableIndex` is returned, that can be used to either definitely close or definitely open
/// the index without waiting anymore.
pub fn wait_timeout(self, timeout: Duration) -> Option<ReopenableIndex> {
self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex {
uuid: self.uuid,
map_size: self.map_size,
generation: self.generation,
})
}
}
pub struct ReopenableIndex {
uuid: Uuid,
map_size: usize,
generation: usize,
}
impl ReopenableIndex {
/// Attempts to reopen the index, which can result in the index being reopened again or not
/// (e.g. if another thread already opened and closed the index again).
///
/// Use get again on the IndexMap to get the updated status.
///
/// Fails if the underlying index creation fails.
///
/// # Status table
///
/// | Previous Status | New Status |
/// |-----------------|----------------------------------------------|
/// | Missing | Missing |
/// | BeingDeleted | BeingDeleted |
/// | Closing | Available or Closing depending on generation |
/// | Available | Available |
///
pub fn reopen(self, map: &mut IndexMap, path: &Path) -> Result<()> {
if let Closing(reopen) = map.get(&self.uuid) {
if reopen.generation != self.generation {
return Ok(());
}
map.unavailable.remove(&self.uuid);
map.create(&self.uuid, path, None, self.map_size)?;
}
Ok(())
}
/// Attempts to close the index, which may or may not result in the index being closed
/// (e.g. if another thread already reopened the index again).
///
/// Use get again on the IndexMap to get the updated status.
///
/// # Status table
///
/// | Previous Status | New Status |
/// |-----------------|--------------------------------------------|
/// | Missing | Missing |
/// | BeingDeleted | BeingDeleted |
/// | Closing | Missing or Closing depending on generation |
/// | Available | Available |
pub fn close(self, map: &mut IndexMap) {
if let Closing(reopen) = map.get(&self.uuid) {
if reopen.generation != self.generation {
return;
}
map.unavailable.remove(&self.uuid);
}
}
}
impl IndexMap {
pub fn new(cap: usize) -> IndexMap {
Self { unavailable: Default::default(), available: LruMap::new(cap), generation: 0 }
}
/// Gets the current status of an index in the map.
///
/// If the index is available it can be accessed from the returned status.
pub fn get(&self, uuid: &Uuid) -> IndexStatus {
self.available
.get(uuid)
.map(|index| Available(index.clone()))
.unwrap_or_else(|| self.get_unavailable(uuid))
}
fn get_unavailable(&self, uuid: &Uuid) -> IndexStatus {
match self.unavailable.get(uuid) {
Some(Some(reopen)) => Closing(reopen.clone()),
Some(None) => BeingDeleted,
None => Missing,
}
}
/// Attempts to create a new index that wasn't existing before.
///
/// # Status table
///
/// | Previous Status | New Status |
/// |-----------------|------------|
/// | Missing | Available |
/// | BeingDeleted | panics |
/// | Closing | panics |
/// | Available | panics |
///
pub fn create(
&mut self,
uuid: &Uuid,
path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>,
map_size: usize,
) -> Result<Index> {
if !matches!(self.get_unavailable(uuid), Missing) {
panic!("Attempt to open an index that was unavailable");
}
let index = create_or_open_index(path, date, map_size)?;
match self.available.insert(*uuid, index.clone()) {
InsertionOutcome::InsertedNew => (),
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
self.close(evicted_uuid, evicted_index, 0);
}
InsertionOutcome::Replaced(_) => {
panic!("Attempt to open an index that was already opened")
}
}
Ok(index)
}
/// Increases the current generation. See documentation for this field.
///
/// In the unlikely event that the 2^64 generations would have been exhausted, we simply wrap-around.
///
/// For this to cause an issue, one should be able to stop a reader in time after it got a `ReopenableIndex` and before it takes the lock
/// to remove it from the unavailable map, and keep the reader in this frozen state for 2^64 closing of other indexes.
///
/// This seems overwhelmingly impossible to achieve in practice.
fn next_generation(&mut self) -> usize {
self.generation = self.generation.wrapping_add(1);
self.generation
}
/// Attempts to close an index.
///
/// # Status table
///
/// | Previous Status | New Status |
/// |-----------------|---------------|
/// | Missing | Missing |
/// | BeingDeleted | BeingDeleted |
/// | Closing | Closing |
/// | Available | Closing |
///
pub fn close_for_resize(&mut self, uuid: &Uuid, map_size_growth: usize) {
let Some(index) = self.available.remove(uuid) else { return; };
self.close(*uuid, index, map_size_growth);
}
fn close(&mut self, uuid: Uuid, index: Index, map_size_growth: usize) {
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
let closing_event = index.prepare_for_closing();
let generation = self.next_generation();
self.unavailable
.insert(uuid, Some(ClosingIndex { uuid, closing_event, map_size, generation }));
}
/// Attempts to delete and index.
///
/// `end_deletion` must be called just after.
///
/// # Status table
///
/// | Previous Status | New Status | Return value |
/// |-----------------|--------------|-----------------------------|
/// | Missing | BeingDeleted | Ok(None) |
/// | BeingDeleted | BeingDeleted | Err(None) |
/// | Closing | Closing | Err(Some(reopen)) |
/// | Available | BeingDeleted | Ok(Some(env_closing_event)) |
pub fn start_deletion(
&mut self,
uuid: &Uuid,
) -> std::result::Result<Option<EnvClosingEvent>, Option<ClosingIndex>> {
if let Some(index) = self.available.remove(uuid) {
self.unavailable.insert(*uuid, None);
return Ok(Some(index.prepare_for_closing()));
}
match self.unavailable.remove(uuid) {
Some(Some(reopen)) => Err(Some(reopen)),
Some(None) => Err(None),
None => Ok(None),
}
}
/// Marks that an index deletion finished.
///
/// Must be used after calling `start_deletion`.
///
/// # Status table
///
/// | Previous Status | New Status |
/// |-----------------|------------|
/// | Missing | Missing |
/// | BeingDeleted | Missing |
/// | Closing | panics |
/// | Available | panics |
pub fn end_deletion(&mut self, uuid: &Uuid) {
assert!(
self.available.get(uuid).is_none(),
"Attempt to finish deletion of an index that was not being deleted"
);
// Do not panic if the index was Missing or BeingDeleted
assert!(
!matches!(self.unavailable.remove(uuid), Some(Some(_))),
"Attempt to finish deletion of an index that was being closed"
);
}
}
/// Create or open an index in the specified path.
/// The path *must* exist or an error will be thrown.
fn create_or_open_index(
path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>,
map_size: usize,
) -> Result<Index> {
let mut options = EnvOpenOptions::new();
options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024);
if let Some((created, updated)) = date {
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
} else {
Ok(Index::new(options, path)?)
}
}
/// Putting the tests of the LRU down there so we have access to the cache's private members
#[cfg(test)]
mod tests {
use meilisearch_types::heed::Env;
use meilisearch_types::Index;
use uuid::Uuid;
use super::super::IndexMapper;
use crate::tests::IndexSchedulerHandle;
use crate::utils::clamp_to_page_size;
use crate::IndexScheduler;
impl IndexMapper {
fn test() -> (Self, Env, IndexSchedulerHandle) {
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
(index_scheduler.index_mapper, index_scheduler.env, handle)
}
}
fn check_first_unavailable(mapper: &IndexMapper, expected_uuid: Uuid, is_closing: bool) {
let index_map = mapper.index_map.read().unwrap();
let (uuid, state) = index_map.unavailable.first_key_value().unwrap();
assert_eq!(uuid, &expected_uuid);
assert_eq!(state.is_some(), is_closing);
}
#[test]
fn evict_indexes() {
let (mapper, env, _handle) = IndexMapper::test();
let mut uuids = vec![];
// LRU cap + 1
for i in 0..(5 + 1) {
let index_name = format!("index-{i}");
let wtxn = env.write_txn().unwrap();
mapper.create_index(wtxn, &index_name, None).unwrap();
let txn = env.read_txn().unwrap();
uuids.push(mapper.index_mapping.get(&txn, &index_name).unwrap().unwrap());
}
// index-0 was evicted
check_first_unavailable(&mapper, uuids[0], true);
// get back the evicted index
let wtxn = env.write_txn().unwrap();
mapper.create_index(wtxn, "index-0", None).unwrap();
// Least recently used is now index-1
check_first_unavailable(&mapper, uuids[1], true);
}
#[test]
fn resize_index() {
let (mapper, env, _handle) = IndexMapper::test();
let index = mapper.create_index(env.write_txn().unwrap(), "index", None).unwrap();
assert_index_size(index, mapper.index_base_map_size);
mapper.resize_index(&env.read_txn().unwrap(), "index").unwrap();
let index = mapper.create_index(env.write_txn().unwrap(), "index", None).unwrap();
assert_index_size(index, mapper.index_base_map_size + mapper.index_growth_amount);
mapper.resize_index(&env.read_txn().unwrap(), "index").unwrap();
let index = mapper.create_index(env.write_txn().unwrap(), "index", None).unwrap();
assert_index_size(index, mapper.index_base_map_size + mapper.index_growth_amount * 2);
}
fn assert_index_size(index: Index, expected: usize) {
let expected = clamp_to_page_size(expected);
let index_map_size = index.map_size().unwrap();
assert_eq!(index_map_size, expected);
}
}

View File

@@ -1,370 +0,0 @@
use std::path::PathBuf;
use std::sync::{Arc, RwLock};
use std::time::Duration;
use std::{fs, thread};
use log::error;
use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::Index;
use time::OffsetDateTime;
use uuid::Uuid;
use self::index_map::IndexMap;
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
use crate::uuid_codec::UuidCodec;
use crate::{Error, Result};
mod index_map;
const INDEX_MAPPING: &str = "index-mapping";
/// Structure managing meilisearch's indexes.
///
/// It is responsible for:
/// 1. Creating new indexes
/// 2. Opening indexes and storing references to these opened indexes
/// 3. Accessing indexes through their uuid
/// 4. Mapping a user-defined name to each index uuid.
///
/// # Implementation notes
///
/// An index exists as 3 bits of data:
/// 1. The index data on disk, that can exist in 3 states: Missing, Present, or BeingDeleted.
/// 2. The persistent database containing the association between the index' name and its UUID,
/// that can exist in 2 states: Missing or Present.
/// 3. The state of the index in the in-memory `IndexMap`, that can exist in multiple states:
/// - Missing
/// - Available
/// - Closing (because an index needs resizing or was evicted from the cache)
/// - BeingDeleted
///
/// All of this data should be kept consistent between index operations, which is achieved by the `IndexMapper`
/// with the use of the following primitives:
/// - A RwLock on the `IndexMap`.
/// - Transactions on the association database.
/// - ClosingEvent signals emitted when closing an environment.
#[derive(Clone)]
pub struct IndexMapper {
/// Keep track of the opened indexes. Used mainly by the index resolver.
index_map: Arc<RwLock<IndexMap>>,
/// Map an index name with an index uuid currently available on disk.
pub(crate) index_mapping: Database<Str, UuidCodec>,
/// Path to the folder where the LMDB environments of each index are.
base_path: PathBuf,
/// The map size an index is opened with on the first time.
index_base_map_size: usize,
/// The quantity by which the map size of an index is incremented upon reopening, in bytes.
index_growth_amount: usize,
pub indexer_config: Arc<IndexerConfig>,
}
/// Whether the index is available for use or is forbidden to be inserted back in the index map
#[allow(clippy::large_enum_variant)]
#[derive(Clone)]
pub enum IndexStatus {
/// Not currently in the index map.
Missing,
/// Do not insert it back in the index map as it is currently being deleted.
BeingDeleted,
/// Temporarily do not insert the index in the index map as it is currently being resized/evicted from the map.
Closing(index_map::ClosingIndex),
/// You can use the index without worrying about anything.
Available(Index),
}
impl IndexMapper {
pub fn new(
env: &Env,
base_path: PathBuf,
index_base_map_size: usize,
index_growth_amount: usize,
index_count: usize,
indexer_config: IndexerConfig,
) -> Result<Self> {
Ok(Self {
index_map: Arc::new(RwLock::new(IndexMap::new(index_count))),
index_mapping: env.create_database(Some(INDEX_MAPPING))?,
base_path,
index_base_map_size,
index_growth_amount,
indexer_config: Arc::new(indexer_config),
})
}
/// Get or create the index.
pub fn create_index(
&self,
mut wtxn: RwTxn,
name: &str,
date: Option<(OffsetDateTime, OffsetDateTime)>,
) -> Result<Index> {
match self.index(&wtxn, name) {
Ok(index) => {
wtxn.commit()?;
Ok(index)
}
Err(Error::IndexNotFound(_)) => {
let uuid = Uuid::new_v4();
self.index_mapping.put(&mut wtxn, name, &uuid)?;
let index_path = self.base_path.join(uuid.to_string());
fs::create_dir_all(&index_path)?;
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
// This is very unlikely to happen in practice.
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
let index = self.index_map.write().unwrap().create(
&uuid,
&index_path,
date,
self.index_base_map_size,
)?;
wtxn.commit()?;
Ok(index)
}
error => error,
}
}
/// Removes the index from the mapping table and the in-memory index map
/// but keeps the associated tasks.
pub fn delete_index(&self, mut wtxn: RwTxn, name: &str) -> Result<()> {
let uuid = self
.index_mapping
.get(&wtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// Once we retrieved the UUID of the index we remove it from the mapping table.
assert!(self.index_mapping.delete(&mut wtxn, name)?);
wtxn.commit()?;
let mut tries = 0;
// Attempts to remove the index from the in-memory index map in a loop.
//
// If the index is currently being closed, we will wait for it to be closed and retry getting it in a subsequent
// loop iteration.
//
// We make 100 attempts before giving up.
// This could happen in the following situations:
//
// 1. There is a bug preventing the index from being correctly closed, or us from detecting this.
// 2. A user of the index is keeping it open for more than 600 seconds. This could happen e.g. during a pathological search.
// This can not be caused by indexation because deleting an index happens in the scheduler itself, so cannot be concurrent with indexation.
//
// In these situations, reporting the error through a panic is in order.
let closing_event = loop {
let mut lock = self.index_map.write().unwrap();
match lock.start_deletion(&uuid) {
Ok(env_closing) => break env_closing,
Err(Some(reopen)) => {
// drop the lock here so that we don't synchronously wait for the index to close.
drop(lock);
tries += 1;
if tries >= 100 {
panic!("Too many attempts to close index {name} prior to deletion.")
}
let reopen = if let Some(reopen) = reopen.wait_timeout(Duration::from_secs(6)) {
reopen
} else {
continue;
};
reopen.close(&mut self.index_map.write().unwrap());
continue;
}
Err(None) => return Ok(()),
}
};
let index_map = self.index_map.clone();
let index_path = self.base_path.join(uuid.to_string());
let index_name = name.to_string();
thread::Builder::new()
.name(String::from("index_deleter"))
.spawn(move || {
// We first wait to be sure that the previously opened index is effectively closed.
// This can take a lot of time, this is why we do that in a separate thread.
if let Some(closing_event) = closing_event {
closing_event.wait();
}
// Then we remove the content from disk.
if let Err(e) = fs::remove_dir_all(&index_path) {
error!(
"An error happened when deleting the index {} ({}): {}",
index_name, uuid, e
);
}
// Finally we remove the entry from the index map.
index_map.write().unwrap().end_deletion(&uuid);
})
.unwrap();
Ok(())
}
pub fn exists(&self, rtxn: &RoTxn, name: &str) -> Result<bool> {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
/// Resizes the maximum size of the specified index to the double of its current maximum size.
///
/// This operation involves closing the underlying environment and so can take a long time to complete.
///
/// # Panics
///
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
/// in memory hash map.
pub fn resize_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
let uuid = self
.index_mapping
.get(rtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// We remove the index from the in-memory index map.
self.index_map.write().unwrap().close_for_resize(&uuid, self.index_growth_amount);
Ok(())
}
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
let uuid = self
.index_mapping
.get(rtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
let mut tries = 0;
// attempts to open the index in a loop.
//
// If the index is currently being closed, we will wait for it to be closed and retry getting it in a subsequent
// loop iteration.
//
// We make 100 attempts before giving up.
// This could happen in the following situations:
//
// 1. There is a bug preventing the index from being correctly closed, or us from detecting it was.
// 2. A user of the index is keeping it open for more than 600 seconds. This could happen e.g. during a long indexation,
// a pathological search, and so on.
//
// In these situations, reporting the error through a panic is in order.
let index = loop {
tries += 1;
if tries > 100 {
panic!("Too many spurious wake ups while trying to open the index {name}");
}
// we get the index here to drop the lock before entering the match
let index = self.index_map.read().unwrap().get(&uuid);
match index {
Available(index) => break index,
Closing(reopen) => {
// Avoiding deadlocks: no lock taken while doing this operation.
let reopen = if let Some(reopen) = reopen.wait_timeout(Duration::from_secs(6)) {
reopen
} else {
continue;
};
let index_path = self.base_path.join(uuid.to_string());
// take the lock to reopen the environment.
reopen.reopen(&mut self.index_map.write().unwrap(), &index_path)?;
continue;
}
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
Missing => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two searches happen
// at the same time), thus before opening it we check a second time
// if it's not already there.
match index_map.get(&uuid) {
Missing => {
let index_path = self.base_path.join(uuid.to_string());
break index_map.create(
&uuid,
&index_path,
None,
self.index_base_map_size,
)?;
}
Available(index) => break index,
Closing(_) => {
// the reopening will be handled in the next loop operation
continue;
}
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
}
}
}
};
Ok(index)
}
/// Attempts `f` for each index that exists in the index mapper.
///
/// It is preferable to use this function rather than a loop that opens all indexes, as a way to avoid having all indexes opened,
/// which is unsupported in general.
///
/// Since `f` is allowed to return a result, and `Index` is cloneable, it is still possible to wrongly build e.g. a vector of
/// all the indexes, but this function makes it harder and so less likely to do accidentally.
pub fn try_for_each_index<U, V>(
&self,
rtxn: &RoTxn,
mut f: impl FnMut(&str, &Index) -> Result<U>,
) -> Result<V>
where
V: FromIterator<U>,
{
self.index_mapping
.iter(rtxn)?
.map(|res| {
res.map_err(Error::from)
.and_then(|(name, _)| self.index(rtxn, name).and_then(|index| f(name, &index)))
})
.collect()
}
/// Return the name of all indexes without opening them.
pub fn index_names(&self, rtxn: &RoTxn) -> Result<Vec<String>> {
self.index_mapping
.iter(rtxn)?
.map(|res| res.map_err(Error::from).map(|(name, _)| name.to_string()))
.collect()
}
/// Swap two index names.
pub fn swap(&self, wtxn: &mut RwTxn, lhs: &str, rhs: &str) -> Result<()> {
let lhs_uuid = self
.index_mapping
.get(wtxn, lhs)?
.ok_or_else(|| Error::IndexNotFound(lhs.to_string()))?;
let rhs_uuid = self
.index_mapping
.get(wtxn, rhs)?
.ok_or_else(|| Error::IndexNotFound(rhs.to_string()))?;
self.index_mapping.put(wtxn, lhs, &rhs_uuid)?;
self.index_mapping.put(wtxn, rhs, &lhs_uuid)?;
Ok(())
}
pub fn index_exists(&self, rtxn: &RoTxn, name: &str) -> Result<bool> {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
pub fn indexer_config(&self) -> &IndexerConfig {
&self.indexer_config
}
}

View File

@@ -33,8 +33,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
snapshots_path: _,
auth_path: _,
version_file_path: _,
cluster: _,
consistency_level: _,
test_breakpoint_sdr: _,
planned_failures: _,
run_loop_iteration: _,
@@ -256,6 +254,6 @@ pub fn snapshot_canceled_by(
snap
}
pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
let names = mapper.index_names(rtxn).unwrap();
let names = mapper.indexes(rtxn).unwrap().into_iter().map(|(n, _)| n).collect::<Vec<_>>();
format!("{names:?}")
}

View File

@@ -24,27 +24,22 @@ pub mod error;
mod index_mapper;
#[cfg(test)]
mod insta_snapshot;
mod lru;
mod utils;
mod uuid_codec;
pub type Result<T> = std::result::Result<T, Error>;
pub type TaskId = u32;
use std::io::Write;
use std::ops::{Bound, RangeBounds};
use std::path::{Path, PathBuf};
use std::path::PathBuf;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering::Relaxed;
use std::sync::{Arc, RwLock};
use std::time::Duration;
use batch::Batch;
use cluster::{Cluster, Consistency};
use dump::{KindDump, TaskDump, UpdateFile};
pub use error::Error;
use file_store::FileStore;
use log::info;
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{self, Database, Env, RoTxn};
@@ -54,7 +49,6 @@ use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use serde::Deserialize;
use synchronoise::SignalEvent;
use time::OffsetDateTime;
use utils::{filter_out_references_to_newer_tasks, keep_tasks_within_datetimes, map_bound};
@@ -235,12 +229,8 @@ pub struct IndexSchedulerOptions {
pub dumps_path: PathBuf,
/// The maximum size, in bytes, of the task index.
pub task_db_size: usize,
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
pub index_base_map_size: usize,
/// The size, in bytes, by which the map size of an index is increased when it resized due to being full.
pub index_growth_amount: usize,
/// The number of indexes that can be concurrently opened in memory.
pub index_count: usize,
/// The maximum size, in bytes, of each meilisearch index.
pub index_size: usize,
/// Configuration used during indexing for each meilisearch index.
pub indexer_config: IndexerConfig,
/// Set to `true` iff the index scheduler is allowed to automatically
@@ -307,11 +297,6 @@ pub struct IndexScheduler {
/// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf,
/// The role in the cluster
pub(crate) cluster: Option<Cluster>,
/// The Consistency level used by the leader. Ignored if the node is not in a leader in cluster mode.
pub(crate) consistency_level: Consistency,
// ================= test
// The next entry is dedicated to the tests.
/// Provide a way to set a breakpoint in multiple part of the scheduler.
@@ -331,24 +316,6 @@ pub struct IndexScheduler {
run_loop_iteration: Arc<RwLock<usize>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
pub enum ClusterMode {
Leader,
Follower,
}
impl std::str::FromStr for ClusterMode {
type Err = ();
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"leader" => Ok(ClusterMode::Leader),
"follower" => Ok(ClusterMode::Follower),
_ => Err(()),
}
}
}
impl IndexScheduler {
fn private_clone(&self) -> IndexScheduler {
IndexScheduler {
@@ -371,8 +338,6 @@ impl IndexScheduler {
dumps_path: self.dumps_path.clone(),
auth_path: self.auth_path.clone(),
version_file_path: self.version_file_path.clone(),
cluster: self.cluster.clone(),
consistency_level: self.consistency_level,
#[cfg(test)]
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
#[cfg(test)]
@@ -387,8 +352,6 @@ impl IndexScheduler {
/// Create an index scheduler and start its run loop.
pub fn new(
options: IndexSchedulerOptions,
cluster: Option<Cluster>,
consistency_level: Consistency,
#[cfg(test)] test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
#[cfg(test)] planned_failures: Vec<(usize, tests::FailureLocation)>,
) -> Result<Self> {
@@ -397,25 +360,9 @@ impl IndexScheduler {
std::fs::create_dir_all(&options.indexes_path)?;
std::fs::create_dir_all(&options.dumps_path)?;
let task_db_size = clamp_to_page_size(options.task_db_size);
let budget = if options.indexer_config.skip_index_budget {
IndexBudget {
map_size: options.index_base_map_size,
index_count: options.index_count,
task_db_size,
}
} else {
Self::index_budget(
&options.tasks_path,
options.index_base_map_size,
task_db_size,
options.index_count,
)
};
let env = heed::EnvOpenOptions::new()
.max_dbs(10)
.map_size(budget.task_db_size)
.map_size(clamp_to_page_size(options.task_db_size))
.open(options.tasks_path)?;
let file_store = FileStore::new(&options.update_file_path)?;
@@ -435,9 +382,7 @@ impl IndexScheduler {
index_mapper: IndexMapper::new(
&env,
options.indexes_path,
budget.map_size,
options.index_growth_amount,
budget.index_count,
options.index_size,
options.indexer_config,
)?,
env,
@@ -448,8 +393,6 @@ impl IndexScheduler {
snapshots_path: options.snapshots_path,
auth_path: options.auth_path,
version_file_path: options.version_file_path,
cluster,
consistency_level,
#[cfg(test)]
test_breakpoint_sdr,
@@ -463,75 +406,6 @@ impl IndexScheduler {
Ok(this)
}
fn index_budget(
tasks_path: &Path,
base_map_size: usize,
mut task_db_size: usize,
max_index_count: usize,
) -> IndexBudget {
#[cfg(windows)]
const DEFAULT_BUDGET: usize = 6 * 1024 * 1024 * 1024 * 1024; // 6 TiB, 1 index
#[cfg(not(windows))]
const DEFAULT_BUDGET: usize = 80 * 1024 * 1024 * 1024 * 1024; // 80 TiB, 18 indexes
let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) {
DEFAULT_BUDGET
} else {
log::debug!("determining budget with dichotomic search");
utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| {
Self::is_good_heed(tasks_path, map_size)
})
};
log::debug!("memmap budget: {budget}B");
let mut budget = budget / 2;
if task_db_size > (budget / 2) {
task_db_size = clamp_to_page_size(budget * 2 / 5);
log::debug!(
"Decreasing max size of task DB to {task_db_size}B due to constrained memory space"
);
}
budget -= task_db_size;
// won't be mutated again
let budget = budget;
let task_db_size = task_db_size;
log::debug!("index budget: {budget}B");
let mut index_count = budget / base_map_size;
if index_count < 2 {
// take a bit less than half than the budget to make sure we can always afford to open an index
let map_size = (budget * 2) / 5;
// single index of max budget
log::debug!("1 index of {map_size}B can be opened simultaneously.");
return IndexBudget { map_size, index_count: 1, task_db_size };
}
// give us some space for an additional index when the cache is already full
// decrement is OK because index_count >= 2.
index_count -= 1;
if index_count > max_index_count {
index_count = max_index_count;
}
log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
IndexBudget { map_size: base_map_size, index_count, task_db_size }
}
fn is_good_heed(tasks_path: &Path, map_size: usize) -> bool {
if let Ok(env) =
heed::EnvOpenOptions::new().map_size(clamp_to_page_size(map_size)).open(tasks_path)
{
env.prepare_for_closing().wait();
true
} else {
// We're treating all errors equally here, not only allocation errors.
// This means there's a possiblity for the budget to lower due to errors different from allocation errors.
// For persistent errors, this is OK as long as the task db is then reopened normally without ignoring the error this time.
// For transient errors, this could lead to an instance with too low a budget.
// However transient errors are: 1) less likely than persistent errors 2) likely to cause other issues down the line anyway.
false
}
}
pub fn read_txn(&self) -> Result<RoTxn> {
self.env.read_txn().map_err(|e| e.into())
}
@@ -542,38 +416,18 @@ impl IndexScheduler {
/// only once per index scheduler.
fn run(&self) {
let run = self.private_clone();
// if we're a follower we starts a thread to register the tasks coming from the leader
if let Some(Cluster::Follower(ref follower)) = self.cluster {
let this = self.private_clone();
let follower = follower.clone();
std::thread::spawn(move || loop {
let (task, content) = follower.get_new_task();
this.register_raw_task(task, content);
});
} else if let Some(Cluster::Leader(ref leader)) = self.cluster {
// we need a way to let the leader come out of its loop if a new follower joins the cluster
let cluster = leader.wake_up.clone();
let scheduler = self.wake_up.clone();
std::thread::spawn(move || loop {
cluster.wait();
scheduler.signal();
});
}
std::thread::Builder::new()
.name(String::from("scheduler"))
.spawn(move || {
#[cfg(test)]
run.breakpoint(Breakpoint::Init);
run.wake_up.wait();
loop {
run.wake_up.wait();
match run.tick() {
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Ok(0) => (),
Ok(_) => run.wake_up.signal(),
Err(e) => {
log::error!("{}", e);
// Wait one second when an irrecoverable error occurs.
@@ -586,6 +440,7 @@ impl IndexScheduler {
) {
std::thread::sleep(Duration::from_secs(1));
}
run.wake_up.signal();
}
}
}
@@ -605,42 +460,15 @@ impl IndexScheduler {
///
/// * If the index wasn't opened before, the index will be opened.
/// * If the index doesn't exist on disk, the `IndexNotFoundError` is thrown.
///
/// ### Note
///
/// As an `Index` requires a large swath of the virtual memory address space, correct usage of an `Index` does not
/// keep its handle for too long.
///
/// Some configurations also can't reasonably open multiple indexes at once.
/// If you need to fetch information from or perform an action on all indexes,
/// see the `try_for_each_index` function.
pub fn index(&self, name: &str) -> Result<Index> {
let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, name)
}
/// Return the name of all indexes without opening them.
pub fn index_names(self) -> Result<Vec<String>> {
/// Return and open all the indexes.
pub fn indexes(&self) -> Result<Vec<(String, Index)>> {
let rtxn = self.env.read_txn()?;
self.index_mapper.index_names(&rtxn)
}
/// Attempts `f` for each index that exists known to the index scheduler.
///
/// It is preferable to use this function rather than a loop that opens all indexes, as a way to avoid having all indexes opened,
/// which is unsupported in general.
///
/// Since `f` is allowed to return a result, and `Index` is cloneable, it is still possible to wrongly build e.g. a vector of
/// all the indexes, but this function makes it harder and so less likely to do accidentally.
///
/// If many indexes exist, this operation can take time to complete (in the order of seconds for a 1000 of indexes) as it needs to open
/// all the indexes.
pub fn try_for_each_index<U, V>(&self, f: impl FnMut(&str, &Index) -> Result<U>) -> Result<V>
where
V: FromIterator<U>,
{
let rtxn = self.env.read_txn()?;
self.index_mapper.try_for_each_index(&rtxn, f)
self.index_mapper.indexes(&rtxn)
}
/// Return the task ids matched by the given query from the index scheduler's point of view.
@@ -919,16 +747,6 @@ impl IndexScheduler {
return Err(e.into());
}
if let Some(Cluster::Leader(leader)) = &self.cluster {
let update_file = if let Some(uuid) = task.content_uuid() {
let path = self.file_store.get_update_path(uuid);
Some(std::fs::read(path).unwrap())
} else {
None
};
leader.register_new_task(task.clone(), update_file);
}
// If the registered task is a task cancelation
// we inform the processing tasks to stop (if necessary).
if let KindWithContent::TaskCancelation { tasks, .. } = kind {
@@ -945,8 +763,8 @@ impl IndexScheduler {
Ok(task)
}
/// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
/// Register a new task comming from a dump in the scheduler.
/// By takinig a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task(
&mut self,
task: TaskDump,
@@ -1058,44 +876,6 @@ impl IndexScheduler {
Ok(task)
}
/// /!\ should only be used when you're a follower in cluster mode
pub fn register_raw_task(&self, task: Task, content_file: Option<Vec<u8>>) {
if let Some(content) = content_file {
let uuid = task.content_uuid().expect("bad task");
let (_, mut file) = self.file_store.new_update_with_uuid(uuid.as_u128()).unwrap();
file.write_all(&content).unwrap();
file.persist().unwrap();
}
let mut wtxn = self.env.write_txn().unwrap();
self.all_tasks.put(&mut wtxn, &BEU32::new(task.uid), &task).unwrap();
for index in task.indexes() {
self.update_index(&mut wtxn, index, |bitmap| {
bitmap.insert(task.uid);
})
.unwrap();
}
self.update_status(&mut wtxn, task.status, |bitmap| {
bitmap.insert(task.uid);
})
.unwrap();
self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
(bitmap.insert(task.uid));
})
.unwrap();
utils::insert_task_datetime(&mut wtxn, self.enqueued_at, task.enqueued_at, task.uid)
.unwrap();
wtxn.commit().unwrap();
self.wake_up.signal();
}
/// Create a new index without any associated task.
pub fn create_raw_index(
&self,
@@ -1145,22 +925,20 @@ impl IndexScheduler {
/// 5. Reset the in-memory list of processed tasks.
///
/// Returns the number of processed tasks.
fn tick(&self) -> Result<TickOutcome> {
fn tick(&self) -> Result<usize> {
#[cfg(test)]
{
*self.run_loop_iteration.write().unwrap() += 1;
self.breakpoint(Breakpoint::Start);
}
info!("before getting a new batch");
let batch = match self.get_or_create_next_batch()? {
Some(batch) => batch,
None => return Ok(TickOutcome::WaitForSignal),
};
info!("after getting a new batch");
let index_uid = batch.index_uid().map(ToOwned::to_owned);
// TODO cluster: Should we send the starting date as well so everyone is in sync?
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
let batch =
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
Some(batch) => batch,
None => return Ok(0),
};
drop(rtxn);
// 1. store the starting date with the bitmap of processing tasks.
let mut ids = batch.ids();
@@ -1230,23 +1008,7 @@ impl IndexScheduler {
// the `started_at` date times and `processings` of the current processing tasks.
// This date time is used by the task cancelation to store the right `started_at`
// date in the task on disk.
return Ok(TickOutcome::TickAgain(0));
}
// If an index said it was full, we need to:
// 1. identify which index is full
// 2. close the associated environment
// 3. resize it
// 4. re-schedule tasks
Err(Error::Milli(milli::Error::UserError(
milli::UserError::MaxDatabaseSizeReached,
))) if index_uid.is_some() => {
// fixme: add index_uid to match to avoid the unwrap
let index_uid = index_uid.unwrap();
// fixme: handle error more gracefully? not sure when this could happen
self.index_mapper.resize_index(&wtxn, &index_uid)?;
wtxn.abort().map_err(Error::HeedTransaction)?;
return Ok(TickOutcome::TickAgain(0));
return Ok(0);
}
// In case of a failure we must get back and patch all the tasks with the error.
Err(err) => {
@@ -1286,64 +1048,7 @@ impl IndexScheduler {
#[cfg(test)]
self.breakpoint(Breakpoint::AfterProcessing);
Ok(TickOutcome::TickAgain(processed_tasks))
}
/// If there is no cluster or if leader -> create a new batch
/// If follower -> wait till the leader gives us a batch to process
fn get_or_create_next_batch(&self) -> Result<Option<Batch>> {
info!("inside get or create next batch");
let batch = match &self.cluster {
None | Some(Cluster::Leader(_)) => {
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))?
}
Some(Cluster::Follower(follower)) => {
let batch = follower.get_new_batch();
Some(self.get_batch_from_cluster_batch(batch)?)
}
};
if let Some(Cluster::Leader(leader)) = &self.cluster {
// first, onboard the new followers
if leader.has_new_followers() {
info!("New followers are trying to join the cluster");
let started_at = OffsetDateTime::now_utc();
let dump = self
.create_dump(
&Task {
uid: TaskId::MAX,
enqueued_at: started_at,
started_at: Some(started_at),
finished_at: Some(started_at),
error: None,
canceled_by: None,
details: None,
status: Status::Enqueued,
kind: KindWithContent::DumpCreation {
keys: leader.get_keys(),
// TODO cluster: should we unify the instance_uid between every instances?
instance_uid: None,
},
},
&started_at,
)
.unwrap();
let mut buffer = Vec::new();
// TODO cluster: stop writing everything in RAM
dump.persist_to(&mut buffer).unwrap();
leader.join_me(buffer);
}
// second, starts processing the batch
if let Some(ref batch) = batch {
leader.starts_batch(batch.clone().into());
}
}
Ok(batch)
Ok(processed_tasks)
}
pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
@@ -1378,26 +1083,6 @@ impl IndexScheduler {
}
}
/// The outcome of calling the [`IndexScheduler::tick`] function.
pub enum TickOutcome {
/// The scheduler should immediately attempt another `tick`.
///
/// The `usize` field contains the number of processed tasks.
TickAgain(usize),
/// The scheduler should wait for an external signal before attempting another `tick`.
WaitForSignal,
}
/// How many indexes we can afford to have open simultaneously.
struct IndexBudget {
/// Map size of an index.
map_size: usize,
/// Maximum number of simultaneously opened indexes.
index_count: usize,
/// For very constrained systems we might need to reduce the base task_db_size so we can accept at least one index.
task_db_size: usize,
}
#[cfg(test)]
mod tests {
use std::io::{BufWriter, Seek, Write};
@@ -1443,8 +1128,6 @@ mod tests {
let tempdir = TempDir::new().unwrap();
let (sender, receiver) = crossbeam::channel::bounded(0);
let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() };
let options = IndexSchedulerOptions {
version_file_path: tempdir.path().join(VERSION_FILE_NAME),
auth_path: tempdir.path().join("auth"),
@@ -1454,15 +1137,12 @@ mod tests {
snapshots_path: tempdir.path().join("snapshots"),
dumps_path: tempdir.path().join("dumps"),
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_growth_amount: 1000 * 1000, // 1 MB
index_count: 5,
indexer_config,
index_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
indexer_config: IndexerConfig::default(),
autobatching_enabled,
};
let index_scheduler =
Self::new(options, None, Consistency::default(), sender, planned_failures).unwrap();
let index_scheduler = Self::new(options, sender, planned_failures).unwrap();
// To be 100% consistent between all test we're going to start the scheduler right now
// and ensure it's in the expected starting state.
@@ -2000,105 +1680,6 @@ mod tests {
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded");
}
#[test]
fn document_addition_and_document_deletion() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let content = r#"[
{ "id": 1, "doggo": "jean bob" },
{ "id": 2, "catto": "jorts" },
{ "id": 3, "doggo": "bork" }
]"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
index_scheduler
.register(KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1"), S("2")],
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch");
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
#[test]
fn document_deletion_and_document_addition() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
index_scheduler
.register(KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1"), S("2")],
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
let content = r#"[
{ "id": 1, "doggo": "jean bob" },
{ "id": 2, "catto": "jorts" },
{ "id": 3, "doggo": "bork" }
]"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
// The deletion should have failed because it can't create an index
handle.advance_one_failed_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion");
// The addition should works
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition");
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
#[test]
fn do_not_batch_task_of_different_indexes() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);

View File

@@ -1,203 +0,0 @@
//! Thread-safe `Vec`-backend LRU cache using [`std::sync::atomic::AtomicU64`] for synchronization.
use std::sync::atomic::{AtomicU64, Ordering};
/// Thread-safe `Vec`-backend LRU cache
#[derive(Debug)]
pub struct Lru<T> {
data: Vec<(AtomicU64, T)>,
generation: AtomicU64,
cap: usize,
}
impl<T> Lru<T> {
/// Creates a new LRU cache with the specified capacity.
///
/// The capacity is allocated up-front, and will never change through a [`Self::put`] operation.
///
/// # Panics
///
/// - If the capacity is 0.
/// - If the capacity exceeds `isize::MAX` bytes.
pub fn new(cap: usize) -> Self {
assert_ne!(cap, 0, "The capacity of a cache cannot be 0");
Self {
// Note: since the element of the vector contains an AtomicU64, it is definitely not zero-sized so cap will never be usize::MAX.
data: Vec::with_capacity(cap),
generation: AtomicU64::new(0),
cap,
}
}
/// The capacity of this LRU cache, that is the maximum number of elements it can hold before evicting elements from the cache.
///
/// The cache will contain at most this number of elements at any given time.
pub fn capacity(&self) -> usize {
self.cap
}
fn next_generation(&self) -> u64 {
// Acquire so this "happens-before" any potential store to a data cell (with Release ordering)
let generation = self.generation.fetch_add(1, Ordering::Acquire);
generation + 1
}
fn next_generation_mut(&mut self) -> u64 {
let generation = self.generation.get_mut();
*generation += 1;
*generation
}
/// Add a value in the cache, evicting an older value if necessary.
///
/// If a value was evicted from the cache, it is returned.
///
/// # Complexity
///
/// - If the cache is full, then linear in the capacity.
/// - Otherwise constant.
pub fn put(&mut self, value: T) -> Option<T> {
// no need for a memory fence: we assume that whichever mechanism provides us synchronization
// (very probably, a RwLock) takes care of fencing for us.
let next_generation = self.next_generation_mut();
let evicted = if self.is_full() { self.pop() } else { None };
self.data.push((AtomicU64::new(next_generation), value));
evicted
}
/// Evict the oldest value from the cache.
///
/// If the cache is empty, `None` will be returned.
///
/// # Complexity
///
/// - Linear in the capacity of the cache.
pub fn pop(&mut self) -> Option<T> {
// Don't use `Iterator::min_by_key` that provides shared references to its elements,
// so that we can get an exclusive one.
// This allows to handles the `AtomicU64`s as normal integers without using atomic instructions.
let mut min_generation_index = None;
for (index, (generation, _)) in self.data.iter_mut().enumerate() {
let generation = *generation.get_mut();
if let Some((_, min_generation)) = min_generation_index {
if min_generation > generation {
min_generation_index = Some((index, generation));
}
} else {
min_generation_index = Some((index, generation))
}
}
min_generation_index.map(|(min_index, _)| self.data.swap_remove(min_index).1)
}
/// The current number of elements in the cache.
///
/// This value is guaranteed to be less than or equal to [`Self::capacity`].
pub fn len(&self) -> usize {
self.data.len()
}
/// Returns `true` if putting any additional element in the cache would cause the eviction of an element.
pub fn is_full(&self) -> bool {
self.len() == self.capacity()
}
}
pub struct LruMap<K, V>(Lru<(K, V)>);
impl<K, V> LruMap<K, V>
where
K: Eq,
{
/// Creates a new LRU cache map with the specified capacity.
///
/// The capacity is allocated up-front, and will never change through a [`Self::insert`] operation.
///
/// # Panics
///
/// - If the capacity is 0.
/// - If the capacity exceeds `isize::MAX` bytes.
pub fn new(cap: usize) -> Self {
Self(Lru::new(cap))
}
/// Gets a value in the cache map by its key.
///
/// If no value matches, `None` will be returned.
///
/// # Complexity
///
/// - Linear in the capacity of the cache.
pub fn get(&self, key: &K) -> Option<&V> {
for (generation, (candidate, value)) in self.0.data.iter() {
if key == candidate {
generation.store(self.0.next_generation(), Ordering::Release);
return Some(value);
}
}
None
}
/// Gets a value in the cache map by its key.
///
/// If no value matches, `None` will be returned.
///
/// # Complexity
///
/// - Linear in the capacity of the cache.
pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
let next_generation = self.0.next_generation_mut();
for (generation, (candidate, value)) in self.0.data.iter_mut() {
if key == candidate {
*generation.get_mut() = next_generation;
return Some(value);
}
}
None
}
/// Inserts a value in the cache map by its key, replacing any existing value and returning any evicted value.
///
/// # Complexity
///
/// - Linear in the capacity of the cache.
pub fn insert(&mut self, key: K, mut value: V) -> InsertionOutcome<K, V> {
match self.get_mut(&key) {
Some(old_value) => {
std::mem::swap(old_value, &mut value);
InsertionOutcome::Replaced(value)
}
None => match self.0.put((key, value)) {
Some((key, value)) => InsertionOutcome::Evicted(key, value),
None => InsertionOutcome::InsertedNew,
},
}
}
/// Removes an element from the cache map by its key, returning its value.
///
/// Returns `None` if there was no element with this key in the cache.
///
/// # Complexity
///
/// - Linear in the capacity of the cache.
pub fn remove(&mut self, key: &K) -> Option<V> {
for (index, (_, (candidate, _))) in self.0.data.iter_mut().enumerate() {
if key == candidate {
return Some(self.0.data.swap_remove(index).1 .1);
}
}
None
}
}
/// The result of an insertion in a LRU map.
pub enum InsertionOutcome<K, V> {
/// The key was not in the cache, the key-value pair has been inserted.
InsertedNew,
/// The key was not in the cache and an old key-value pair was evicted from the cache to make room for its insertions.
Evicted(K, V),
/// The key was already in the cache map, its value has been updated.
Replaced(V),
}

View File

@@ -1,42 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
"documentDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
["doggos"]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,9 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"id": 3,
"doggo": "bork"
}
]

View File

@@ -1,37 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@@ -1,40 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
"documentDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@@ -1,43 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@@ -1,45 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [1,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
["doggos"]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,17 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"id": 1,
"doggo": "jean bob"
},
{
"id": 2,
"catto": "jorts"
},
{
"id": 3,
"doggo": "bork"
}
]

View File

@@ -1,36 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,40 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@@ -439,29 +439,20 @@ impl IndexScheduler {
provided_ids: received_document_ids,
deleted_documents,
} => {
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
let (index_uid, documents_ids) =
if let KindWithContent::DocumentDeletion {
ref index_uid,
ref documents_ids,
} = kind
{
(index_uid, documents_ids)
} else {
unreachable!()
};
assert_eq!(&task_index_uid.unwrap(), index_uid);
if let Some(deleted_documents) = deleted_documents {
assert_eq!(status, Status::Succeeded);
assert!(deleted_documents <= received_document_ids as u64);
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
match status {
Status::Enqueued | Status::Processing => (),
Status::Succeeded => {
assert!(deleted_documents.unwrap() <= received_document_ids as u64);
assert!(documents_ids.len() == received_document_ids);
}
Status::Failed | Status::Canceled => {
assert!(deleted_documents == Some(0));
assert!(documents_ids.len() == received_document_ids);
match &kind {
KindWithContent::DocumentDeletion { index_uid, documents_ids } => {
assert_eq!(&task_index_uid.unwrap(), index_uid);
assert!(documents_ids.len() >= received_document_ids);
}
_ => panic!(),
}
} else {
assert_ne!(status, Status::Succeeded);
}
}
Details::ClearAll { deleted_documents } => {
@@ -538,37 +529,3 @@ impl IndexScheduler {
}
}
}
pub fn dichotomic_search(start_point: usize, mut is_good: impl FnMut(usize) -> bool) -> usize {
let mut biggest_good = None;
let mut smallest_bad = None;
let mut current = start_point;
loop {
let is_good = is_good(current);
(biggest_good, smallest_bad, current) = match (biggest_good, smallest_bad, is_good) {
(None, None, false) => (None, Some(current), current / 2),
(None, None, true) => (Some(current), None, current * 2),
(None, Some(smallest_bad), true) => {
(Some(current), Some(smallest_bad), (current + smallest_bad) / 2)
}
(None, Some(_), false) => (None, Some(current), current / 2),
(Some(_), None, true) => (Some(current), None, current * 2),
(Some(biggest_good), None, false) => {
(Some(biggest_good), Some(current), (biggest_good + current) / 2)
}
(Some(_), Some(smallest_bad), true) => {
(Some(current), Some(smallest_bad), (smallest_bad + current) / 2)
}
(Some(biggest_good), Some(_), false) => {
(Some(biggest_good), Some(current), (biggest_good + current) / 2)
}
};
if current == 0 {
return current;
}
if smallest_bad.is_some() && biggest_good.is_some() && biggest_good >= Some(current) {
return current;
}
}
}

View File

@@ -12,7 +12,6 @@ license.workspace = true
[dependencies]
base64 = "0.13.1"
cluster = { path = "../cluster" }
enum-iterator = "1.1.3"
hmac = "0.12.1"
maplit = "1.0.2"

View File

@@ -6,7 +6,6 @@ use std::collections::{HashMap, HashSet};
use std::path::Path;
use std::sync::Arc;
use cluster::Cluster;
use error::{AuthControllerError, Result};
use maplit::hashset;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
@@ -22,52 +21,17 @@ use uuid::Uuid;
pub struct AuthController {
store: Arc<HeedAuthStore>,
master_key: Option<String>,
cluster: Option<Cluster>,
}
impl AuthController {
pub fn new(
db_path: impl AsRef<Path>,
master_key: &Option<String>,
cluster: Option<Cluster>,
) -> Result<Self> {
pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> {
let store = HeedAuthStore::new(db_path)?;
if store.is_empty()? {
generate_default_keys(&store)?;
}
let this = Self {
store: Arc::new(store),
master_key: master_key.clone(),
cluster: cluster.clone(),
};
if let Some(Cluster::Follower(follower)) = cluster {
let this = this.clone();
std::thread::spawn(move || loop {
match follower.api_key_operation() {
cluster::ApiKeyOperation::Insert(key) => {
this.store.put_api_key(key).expect("Inconsistency with the leader");
}
cluster::ApiKeyOperation::Delete(uuid) => {
this.store.delete_api_key(uuid).expect("Inconsistency with the leader");
}
}
});
} else if let Some(Cluster::Leader(leader)) = cluster {
let this = this.clone();
std::thread::spawn(move || loop {
let channel = leader.needs_keys();
let keys = this.list_keys().expect("auth controller is dead");
channel.send(keys).expect("Cluster is dead");
});
}
Ok(this)
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
}
/// Return the size of the `AuthController` database in bytes.
@@ -78,13 +42,7 @@ impl AuthController {
pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
match self.store.get_api_key(create_key.uid)? {
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),
None => {
let key = self.store.put_api_key(create_key.to_key())?;
if let Some(Cluster::Leader(ref leader)) = self.cluster {
leader.insert_key(key.clone());
}
Ok(key)
}
None => self.store.put_api_key(create_key.to_key()),
}
}
@@ -99,12 +57,7 @@ impl AuthController {
name => key.name = name.set(),
};
key.updated_at = OffsetDateTime::now_utc();
let key = self.store.put_api_key(key)?;
if let Some(Cluster::Leader(ref leader)) = self.cluster {
leader.insert_key(key.clone());
}
Ok(key)
self.store.put_api_key(key)
}
pub fn get_key(&self, uid: Uuid) -> Result<Key> {
@@ -147,9 +100,6 @@ impl AuthController {
pub fn delete_key(&self, uid: Uuid) -> Result<()> {
if self.store.delete_api_key(uid)? {
if let Some(Cluster::Leader(ref leader)) = self.cluster {
leader.delete_key(uid);
}
Ok(())
} else {
Err(AuthControllerError::ApiKeyNotFound(uid.to_string()))
@@ -223,8 +173,8 @@ impl Default for AuthFilter {
impl AuthFilter {
#[inline]
pub fn allow_index_creation(&self, index: &str) -> bool {
self.allow_index_creation && self.is_index_authorized(index)
pub fn allow_index_creation(&self) -> bool {
self.allow_index_creation
}
pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self {

View File

@@ -15,7 +15,7 @@ actix-web = { version = "4.2.1", default-features = false }
anyhow = "1.0.65"
convert_case = "0.6.0"
csv = "1.1.6"
deserr = "0.5.0"
deserr = "0.4.1"
either = { version = "1.6.1", features = ["serde"] }
enum-iterator = "1.1.3"
file-store = { path = "../file-store" }

View File

@@ -19,7 +19,7 @@ type Result<T> = std::result::Result<T, DocumentFormatError>;
pub enum PayloadType {
Ndjson,
Json,
Csv { delimiter: u8 },
Csv,
}
impl fmt::Display for PayloadType {
@@ -27,7 +27,7 @@ impl fmt::Display for PayloadType {
match self {
PayloadType::Ndjson => f.write_str("ndjson"),
PayloadType::Json => f.write_str("json"),
PayloadType::Csv { .. } => f.write_str("csv"),
PayloadType::Csv => f.write_str("csv"),
}
}
}
@@ -105,11 +105,11 @@ impl ErrorCode for DocumentFormatError {
}
/// Reads CSV from input and write an obkv batch to writer.
pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result<u64> {
pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
let mmap = unsafe { MmapOptions::new().map(file)? };
let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
let csv = csv::Reader::from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?;
let count = builder.documents_count();
let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;

View File

@@ -11,8 +11,8 @@ use serde::{Deserialize, Serialize};
#[serde(rename_all = "camelCase")]
pub struct ResponseError {
#[serde(skip)]
pub code: StatusCode,
pub message: String,
code: StatusCode,
message: String,
#[serde(rename = "code")]
error_code: String,
#[serde(rename = "type")]
@@ -212,7 +212,6 @@ InvalidApiKeyName , InvalidRequest , BAD_REQUEST ;
InvalidApiKeyOffset , InvalidRequest , BAD_REQUEST ;
InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ;
InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;

View File

@@ -46,7 +46,7 @@ pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
pub enum VersionFileError {
#[error(
"Meilisearch (v{}) failed to infer the version of the database.
To update Meilisearch please follow our guide on https://docs.meilisearch.com/learn/update_and_migration/updating.html.",
To update Meilisearch please follow our guide on https://docs.meilisearch.com/learn/advanced/updating.html.",
env!("CARGO_PKG_VERSION").to_string()
)]
MissingVersionFile,
@@ -54,7 +54,7 @@ pub enum VersionFileError {
MalformedVersionFile,
#[error(
"Your database version ({major}.{minor}.{patch}) is incompatible with your current engine version ({}).\n\
To migrate data between Meilisearch versions, please follow our guide on https://docs.meilisearch.com/learn/update_and_migration/updating.html.",
To migrate data between Meilisearch versions, please follow our guide on https://docs.meilisearch.com/learn/advanced/updating.html.",
env!("CARGO_PKG_VERSION").to_string()
)]
VersionMismatch { major: String, minor: String, patch: String },

View File

@@ -24,9 +24,8 @@ bstr = "1.0.1"
byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] }
bytes = "1.2.1"
clap = { version = "4.0.9", features = ["derive", "env"] }
cluster = { path = "../cluster" }
crossbeam-channel = "0.5.6"
deserr = "0.5.0"
deserr = "0.4.1"
dump = { path = "../dump" }
either = "1.8.0"
env_logger = "0.9.1"
@@ -53,7 +52,7 @@ parking_lot = "0.12.1"
permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.9"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.2", features = ["process"] }
prometheus = { version = "0.13.2", features = ["process"], optional = true }
rand = "0.8.5"
rayon = "1.5.3"
regex = "1.6.0"
@@ -108,6 +107,7 @@ zip = { version = "0.6.2", optional = true }
[features]
default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
metrics = ["prometheus"]
analytics = ["segment"]
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
chinese = ["meilisearch-types/chinese"]
@@ -116,5 +116,5 @@ japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.6/build.zip"
sha1 = "dce0aba16bceab5549edf9f01de89858800f7422"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.5/build.zip"
sha1 = "6fe959b78511b32e9ff857fd9fd31740633b9fce"

View File

@@ -224,7 +224,6 @@ impl super::Analytics for SegmentAnalytics {
#[derive(Debug, Clone, Serialize)]
struct Infos {
env: String,
experimental_enable_metrics: bool,
db_path: bool,
import_dump: bool,
dump_dir: bool,
@@ -257,7 +256,6 @@ impl From<Opt> for Infos {
// Thus we must not insert `..` at the end.
let Opt {
db_path,
experimental_enable_metrics,
http_addr,
master_key: _,
env,
@@ -282,7 +280,6 @@ impl From<Opt> for Infos {
dump_dir,
log_level,
indexer_options,
cluster_configuration: _,
config_file_path,
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics: _,
@@ -293,14 +290,12 @@ impl From<Opt> for Infos {
ScheduleSnapshot::Enabled(interval) => Some(interval),
};
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
indexer_options;
let IndexerOpts { max_indexing_memory, max_indexing_threads } = indexer_options;
// We're going to override every sensible information.
// We consider information sensible if it contains a path, an address, or a key.
Self {
env,
experimental_enable_metrics,
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),
dump_dir: dump_dir != PathBuf::from("dumps/"),
@@ -501,7 +496,6 @@ pub struct SearchAggregator {
// filter
filter_with_geo_radius: bool,
filter_with_geo_bounding_box: bool,
// every time a request has a filter, this field must be incremented by the number of terms it contains
filter_sum_of_criteria_terms: usize,
// every time a request has a filter, this field must be incremented by one
@@ -569,7 +563,6 @@ impl SearchAggregator {
let stringified_filters = filter.to_string();
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
}
@@ -629,7 +622,6 @@ impl SearchAggregator {
// filter
self.filter_with_geo_radius |= other.filter_with_geo_radius;
self.filter_with_geo_bounding_box |= other.filter_with_geo_bounding_box;
self.filter_sum_of_criteria_terms =
self.filter_sum_of_criteria_terms.saturating_add(other.filter_sum_of_criteria_terms);
self.filter_total_number_of_criteria = self
@@ -697,7 +689,6 @@ impl SearchAggregator {
},
"filter": {
"with_geoRadius": self.filter_with_geo_radius,
"with_geoBoundingBox": self.filter_with_geo_bounding_box,
"avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64),
"most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
},

View File

@@ -11,8 +11,6 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>),
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String),
#[error(
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}",
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
@@ -54,7 +52,6 @@ impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,

View File

@@ -4,15 +4,17 @@ pub mod error;
pub mod analytics;
#[macro_use]
pub mod extractors;
pub mod metrics;
pub mod middleware;
pub mod option;
pub mod routes;
pub mod search;
#[cfg(feature = "metrics")]
pub mod metrics;
#[cfg(feature = "metrics")]
pub mod route_metrics;
use std::fs::File;
use std::io::{BufReader, BufWriter, Write};
use std::net::ToSocketAddrs;
use std::io::{BufReader, BufWriter};
use std::path::Path;
use std::sync::Arc;
use std::thread;
@@ -23,15 +25,14 @@ use actix_http::body::MessageBody;
use actix_web::dev::{ServiceFactory, ServiceResponse};
use actix_web::error::JsonPayloadError;
use actix_web::web::Data;
use actix_web::{web, HttpRequest};
use actix_web::{middleware, web, HttpRequest};
use analytics::Analytics;
use anyhow::bail;
use cluster::{Cluster, Follower, Leader};
use error::PayloadError;
use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use log::{error, info};
use log::error;
use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@@ -44,34 +45,6 @@ use option::ScheduleSnapshot;
use crate::error::MeilisearchHttpError;
/// Default number of simultaneously opened indexes.
///
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
///
/// Lower for Windows that dedicates a smaller virtual address space to processes.
///
/// The value was chosen this way:
///
/// - Windows provides a small virtual address space of about 10TiB to processes.
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
#[cfg(windows)]
const DEFAULT_INDEX_COUNT: usize = 4;
/// Default number of simultaneously opened indexes.
///
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
///
/// The higher, the better for avoiding reopening indexes.
///
/// The value was chosen this way:
///
/// - Opening an index consumes a file descriptor.
/// - The default on many unices is about 256 file descriptors for a process.
/// - 100 is a little bit less than half this value.
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
#[cfg(not(windows))]
const DEFAULT_INDEX_COUNT: usize = 20;
/// Check if a db is empty. It does not provide any information on the
/// validity of the data in it.
/// We consider a database as non empty when it's a non empty directory.
@@ -113,13 +86,13 @@ pub fn create_app(
analytics.clone(),
)
})
.configure(|cfg| routes::configure(cfg, opt.experimental_enable_metrics))
.configure(routes::configure)
.configure(|s| dashboard(s, enable_dashboard));
#[cfg(feature = "metrics")]
let app = app.configure(|s| configure_metrics_route(s, opt.enable_metrics_route));
let app = app.wrap(actix_web::middleware::Condition::new(
opt.experimental_enable_metrics,
middleware::RouteMetrics,
));
#[cfg(feature = "metrics")]
let app = app.wrap(Condition::new(opt.enable_metrics_route, route_metrics::RouteMetrics));
app.wrap(
Cors::default()
.send_wildcard()
@@ -128,9 +101,9 @@ pub fn create_app(
.allow_any_method()
.max_age(86_400), // 24h
)
.wrap(actix_web::middleware::Logger::default())
.wrap(actix_web::middleware::Compress::default())
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
.wrap(middleware::Logger::default())
.wrap(middleware::Compress::default())
.wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim))
}
enum OnFailure {
@@ -145,7 +118,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
// the db is empty and the snapshot exists, import it
if empty_db && snapshot_path_exists {
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
Ok(()) => open_or_create_database_unchecked(opt, None, OnFailure::RemoveDb)?,
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
@@ -162,14 +135,14 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
bail!("snapshot doesn't exist at {}", snapshot_path.display())
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
} else {
open_or_create_database(opt, empty_db, None)?
open_or_create_database(opt, empty_db)?
}
} else if let Some(ref path) = opt.import_dump {
let src_path_exists = path.exists();
// the db is empty and the dump exists, import it
if empty_db && src_path_exists {
let (mut index_scheduler, mut auth_controller) =
open_or_create_database_unchecked(opt, None, OnFailure::RemoveDb)?;
open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
@@ -189,62 +162,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
} else {
open_or_create_database(opt, empty_db, None)?
}
} else if let Some(ref cluster) = opt.cluster_configuration.experimental_enable_ha {
match cluster.as_str() {
"leader" => {
info!("Starting as a leader");
let mut addr = opt.http_addr.to_socket_addrs().unwrap().next().unwrap();
addr.set_port(6666);
open_or_create_database(
opt,
empty_db,
Some(Cluster::Leader(Leader::new(addr, opt.master_key.clone()))),
)?
}
"follower" => {
info!("Starting as a follower");
if !empty_db {
panic!("Can't start as a follower with an already existing data.ms");
}
let mut addr = opt
.cluster_configuration
.leader
.as_ref()
.expect("Can't be a follower without a leader")
.to_socket_addrs()
.unwrap()
.next()
.unwrap();
addr.set_port(6666);
let (follower, dump) = Follower::join(addr, opt.master_key.clone());
let mut dump_file = tempfile::NamedTempFile::new().unwrap();
dump_file.write_all(&dump).unwrap();
let (mut index_scheduler, mut auth_controller) = open_or_create_database_unchecked(
opt,
Some(Cluster::Follower(follower)),
OnFailure::RemoveDb,
)?;
match import_dump(
&opt.db_path,
dump_file.path(),
&mut index_scheduler,
&mut auth_controller,
) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
}
}
}
_ => panic!("Available values for the cluster mode are leader and follower"),
open_or_create_database(opt, empty_db)?
}
} else {
open_or_create_database(opt, empty_db, None)?
open_or_create_database(opt, empty_db)?
};
// We create a loop in a thread that registers snapshotCreation tasks
@@ -269,34 +190,25 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything.
fn open_or_create_database_unchecked(
opt: &Opt,
cluster: Option<Cluster>,
on_failure: OnFailure,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key, cluster.clone());
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(
IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes()
as usize,
index_count: DEFAULT_INDEX_COUNT,
},
cluster,
opt.cluster_configuration.consistency,
)?)
Ok(IndexScheduler::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_size: opt.max_index_size.get_bytes() as usize,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
})?)
};
match (
@@ -318,13 +230,12 @@ fn open_or_create_database_unchecked(
fn open_or_create_database(
opt: &Opt,
empty_db: bool,
cluster: Option<Cluster>,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
if !empty_db {
check_version_file(&opt.db_path)?;
}
open_or_create_database_unchecked(opt, cluster, OnFailure::KeepDb)
open_or_create_database_unchecked(opt, OnFailure::KeepDb)
}
fn import_dump(
@@ -508,6 +419,15 @@ pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
config.service(web::resource("/").route(web::get().to(routes::running)));
}
#[cfg(feature = "metrics")]
pub fn configure_metrics_route(config: &mut web::ServiceConfig, enable_metrics_route: bool) {
if enable_metrics_route {
config.service(
web::resource("/metrics").route(web::get().to(crate::route_metrics::get_metrics)),
);
}
}
/// Parses the output of
/// [`VERGEN_GIT_SEMVER_LIGHTWEIGHT`](https://docs.rs/vergen/latest/vergen/struct.Git.html#instructions)
/// as a prototype name.

View File

@@ -12,7 +12,6 @@ use std::{env, fmt, fs};
use byte_unit::{Byte, ByteError};
use clap::Parser;
use cluster::Consistency;
use meilisearch_types::milli::update::IndexerConfig;
use rustls::server::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache,
@@ -48,7 +47,8 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
#[cfg(feature = "metrics")]
const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
@@ -65,11 +65,11 @@ const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
const DEFAULT_LOG_EVERY_N: usize = 100_000;
// Each environment (index and task-db) is taking space in the virtual address space.
// Ideally, indexes can occupy 2TiB each to avoid having to manually resize them.
// The actual size of the virtual address space is computed at startup to determine how many 2TiB indexes can be
// opened simultaneously.
pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB
pub const TASK_DB_SIZE: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB
//
// The size of the virtual address space is limited by the OS. About 100TB for Linux and about 10TB for Windows.
// This means that the number of indexes is limited to about 200 for Linux and about 20 for Windows.
pub const INDEX_SIZE: u64 = 536_870_912_000; // 500 GiB
pub const TASK_DB_SIZE: u64 = 10_737_418_240; // 10 GiB
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")]
@@ -287,21 +287,16 @@ pub struct Opt {
#[serde(default)]
pub log_level: LogLevel,
/// Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
///
/// Enables the Prometheus metrics on the `GET /metrics` endpoint.
#[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_METRICS)]
/// Enables Prometheus metrics and /metrics route.
#[cfg(feature = "metrics")]
#[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)]
#[serde(default)]
pub experimental_enable_metrics: bool,
pub enable_metrics_route: bool,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
#[serde(flatten)]
#[clap(flatten)]
pub cluster_configuration: ClusterOpts,
/// Set the path to a configuration file that should be used to setup the engine.
/// Format must be TOML.
#[clap(long)]
@@ -389,8 +384,8 @@ impl Opt {
config_file_path: _,
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics,
experimental_enable_metrics: enable_metrics_route,
cluster_configuration: _,
#[cfg(feature = "metrics")]
enable_metrics_route,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -428,10 +423,13 @@ impl Opt {
export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir);
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_METRICS,
enable_metrics_route.to_string(),
);
#[cfg(feature = "metrics")]
{
export_to_env_if_not_present(
MEILI_ENABLE_METRICS_ROUTE,
enable_metrics_route.to_string(),
);
}
indexer_options.export_to_env();
}
@@ -496,21 +494,12 @@ pub struct IndexerOpts {
#[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)]
#[serde(default)]
pub max_indexing_threads: MaxThreads,
/// Whether or not we want to determine the budget of virtual memory address space we have available dynamically
/// (the default), or statically.
///
/// Determining the budget of virtual memory address space dynamically takes some time on some systems (such as macOS)
/// and may make tests non-deterministic, so we want to skip it in tests.
#[clap(skip)]
#[serde(skip)]
pub skip_index_budget: bool,
}
impl IndexerOpts {
/// Exports the values to their corresponding env vars if they are not set.
pub fn export_to_env(self) {
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = self;
let IndexerOpts { max_indexing_memory, max_indexing_threads } = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
MEILI_MAX_INDEXING_MEMORY,
@@ -524,21 +513,6 @@ impl IndexerOpts {
}
}
#[derive(Debug, Default, Clone, Parser, Deserialize)]
pub struct ClusterOpts {
#[clap(long)]
#[serde(default)]
pub experimental_enable_ha: Option<String>,
#[clap(long)]
#[serde(default)]
pub leader: Option<String>,
#[clap(long, default_value_t)]
#[serde(default)]
pub consistency: Consistency,
}
impl TryFrom<&IndexerOpts> for IndexerConfig {
type Error = anyhow::Error;
@@ -553,7 +527,6 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
thread_pool: Some(thread_pool),
max_positions_per_attributes: None,
skip_index_budget: other.skip_index_budget,
..Default::default()
})
}

View File

@@ -1,11 +1,40 @@
//! Contains all the custom middleware used in meilisearch
use std::future::{ready, Ready};
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
use actix_web::Error;
use actix_web::http::header;
use actix_web::{Error, HttpResponse};
use futures_util::future::LocalBoxFuture;
use prometheus::HistogramTimer;
use meilisearch_auth::actions;
use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError;
use prometheus::{Encoder, HistogramTimer, TextEncoder};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
pub async fn get_metrics(
meilisearch: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, MeiliSearch>,
) -> Result<HttpResponse, ResponseError> {
let search_rules = &meilisearch.filters().search_rules;
let response = meilisearch.get_all_stats(search_rules).await?;
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
for (index, value) in response.indexes.iter() {
crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
.with_label_values(&[index])
.set(value.number_of_documents as i64);
}
let encoder = TextEncoder::new();
let mut buffer = vec![];
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
Ok(HttpResponse::Ok().insert_header(header::ContentType(mime::TEXT_PLAIN)).body(response))
}
pub struct RouteMetrics;

View File

@@ -10,10 +10,10 @@ use futures::StreamExt;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod;
@@ -67,7 +67,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(get_all_documents)))
.route(web::post().to(SeqHandler(replace_documents)))
.route(web::post().to(SeqHandler(add_documents)))
.route(web::put().to(SeqHandler(update_documents)))
.route(web::delete().to(SeqHandler(clear_all_documents))),
)
@@ -156,31 +156,16 @@ pub async fn get_all_documents(
}
#[derive(Deserialize, Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct UpdateDocumentsQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexPrimaryKey>)]
#[deserr(default, error = DeserrJsonError<InvalidIndexPrimaryKey>)]
pub primary_key: Option<String>,
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidDocumentCsvDelimiter>, error = DeserrQueryParamError<InvalidDocumentCsvDelimiter>)]
pub csv_delimiter: Option<u8>,
}
fn from_char_csv_delimiter(
c: char,
) -> Result<Option<u8>, DeserrQueryParamError<InvalidDocumentCsvDelimiter>> {
if c.is_ascii() {
Ok(Some(c as u8))
} else {
Err(DeserrQueryParamError::new(
format!("csv delimiter must be an ascii character. Found: `{}`", c),
Code::InvalidDocumentCsvDelimiter,
))
}
}
pub async fn replace_documents(
pub async fn add_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrJsonError>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@@ -192,13 +177,12 @@ pub async fn replace_documents(
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.primary_key,
params.csv_delimiter,
body,
IndexDocumentsMethod::ReplaceDocuments,
allow_index_creation,
@@ -211,7 +195,7 @@ pub async fn replace_documents(
pub async fn update_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrJsonError>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@@ -219,17 +203,15 @@ pub async fn update_documents(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
let params = params.into_inner();
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.primary_key,
params.csv_delimiter,
params.into_inner().primary_key,
body,
IndexDocumentsMethod::UpdateDocuments,
allow_index_creation,
@@ -239,43 +221,26 @@ pub async fn update_documents(
Ok(HttpResponse::Accepted().json(task))
}
#[allow(clippy::too_many_arguments)]
async fn document_addition(
mime_type: Option<Mime>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: IndexUid,
primary_key: Option<String>,
csv_delimiter: Option<u8>,
mut body: Payload,
method: IndexDocumentsMethod,
allow_index_creation: bool,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let format = match (
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
csv_delimiter,
) {
(Some(("application", "json")), None) => PayloadType::Json,
(Some(("application", "x-ndjson")), None) => PayloadType::Ndjson,
(Some(("text", "csv")), None) => PayloadType::Csv { delimiter: b',' },
(Some(("text", "csv")), Some(delimiter)) => PayloadType::Csv { delimiter },
(Some(("application", "json")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/json",
)))
}
(Some(("application", "x-ndjson")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/x-ndjson",
)))
}
(Some((type_, subtype)), _) => {
let format = match mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())) {
Some(("application", "json")) => PayloadType::Json,
Some(("application", "x-ndjson")) => PayloadType::Ndjson,
Some(("text", "csv")) => PayloadType::Csv,
Some((type_, subtype)) => {
return Err(MeilisearchHttpError::InvalidContentType(
format!("{}/{}", type_, subtype),
ACCEPTED_CONTENT_TYPE.clone(),
))
}
(None, _) => {
None => {
return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()))
}
};
@@ -320,9 +285,7 @@ async fn document_addition(
let documents_count = tokio::task::spawn_blocking(move || {
let documents_count = match format {
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
PayloadType::Csv { delimiter } => {
read_csv(&read_file, update_file.as_file_mut(), delimiter)?
}
PayloadType::Csv => read_csv(&read_file, update_file.as_file_mut())?,
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.

View File

@@ -61,8 +61,6 @@ pub struct IndexView {
impl IndexView {
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
// It is important that this function does not keep the Index handle or a clone of it, because
// `list_indexes` relies on this property to avoid opening all indexes at once.
let rtxn = index.read_txn()?;
Ok(IndexView {
uid,
@@ -92,15 +90,13 @@ pub async fn list_indexes(
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let filters = index_scheduler.filters();
let indexes: Vec<Option<IndexView>> =
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
if !filters.is_index_authorized(uid) {
return Ok(None);
}
Ok(Some(IndexView::new(uid.to_string(), index)?))
})?;
// Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
let indexes: Vec<_> = index_scheduler.indexes()?;
let indexes = indexes
.into_iter()
.filter(|(name, _)| filters.is_index_authorized(name))
.map(|(name, index)| IndexView::new(name, &index))
.collect::<Result<Vec<_>, _>>()?;
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
debug!("returns: {:?}", ret);
@@ -124,7 +120,8 @@ pub async fn create_index(
) -> Result<HttpResponse, ResponseError> {
let IndexCreateRequest { primary_key, uid } = body.into_inner();
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
// FIXME: allow_index_creation?
let allow_index_creation = index_scheduler.filters().is_index_authorized(&uid);
if allow_index_creation {
analytics.publish(
"Index Created".to_string(),

View File

@@ -45,8 +45,7 @@ macro_rules! make_setting_route {
let new_settings = Settings { $attr: Setting::Reset.into(), ..Default::default() };
let allow_index_creation =
index_scheduler.filters().allow_index_creation(&index_uid);
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let task = KindWithContent::SettingsUpdate {
index_uid: index_uid.to_string(),
@@ -87,8 +86,7 @@ macro_rules! make_setting_route {
..Default::default()
};
let allow_index_creation =
index_scheduler.filters().allow_index_creation(&index_uid);
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let task = KindWithContent::SettingsUpdate {
index_uid: index_uid.to_string(),
@@ -562,7 +560,7 @@ pub async fn update_all(
Some(&req),
);
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
@@ -598,7 +596,7 @@ pub async fn delete_all(
let new_settings = Settings::cleared().into_unchecked();
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,

View File

@@ -1,50 +0,0 @@
use actix_web::http::header;
use actix_web::web::{self, Data};
use actix_web::HttpResponse;
use index_scheduler::IndexScheduler;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use prometheus::{Encoder, TextEncoder};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::routes::create_all_stats;
pub fn configure(config: &mut web::ServiceConfig) {
config.service(web::resource("").route(web::get().to(get_metrics)));
}
pub async fn get_metrics(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, AuthController>,
) -> Result<HttpResponse, ResponseError> {
let auth_filters = index_scheduler.filters();
if !auth_filters.all_indexes_authorized() {
let mut error = ResponseError::from(AuthenticationError::InvalidToken);
error
.message
.push_str(" The API key for the `/metrics` route must allow access to all indexes.");
return Err(error);
}
let response =
create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), auth_filters)?;
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
for (index, value) in response.indexes.iter() {
crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
.with_label_values(&[index])
.set(value.number_of_documents as i64);
}
let encoder = TextEncoder::new();
let mut buffer = vec![];
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
Ok(HttpResponse::Ok().insert_header(header::ContentType(mime::TEXT_PLAIN)).body(response))
}

View File

@@ -22,12 +22,10 @@ const PAGINATION_DEFAULT_LIMIT: usize = 20;
mod api_key;
mod dump;
pub mod indexes;
mod metrics;
mod multi_search;
mod swap_indexes;
pub mod tasks;
pub fn configure(cfg: &mut web::ServiceConfig, enable_metrics: bool) {
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::scope("/tasks").configure(tasks::configure))
.service(web::resource("/health").route(web::get().to(get_health)))
.service(web::scope("/keys").configure(api_key::configure))
@@ -37,10 +35,6 @@ pub fn configure(cfg: &mut web::ServiceConfig, enable_metrics: bool) {
.service(web::scope("/indexes").configure(indexes::configure))
.service(web::scope("/multi-search").configure(multi_search::configure))
.service(web::scope("/swap-indexes").configure(swap_indexes::configure));
if enable_metrics {
cfg.service(web::scope("/metrics").configure(metrics::configure));
}
}
#[derive(Debug, Serialize)]
@@ -266,9 +260,9 @@ pub fn create_all_stats(
)?;
// accumulate the size of each indexes
let processing_index = processing_task.first().and_then(|task| task.index_uid());
index_scheduler.try_for_each_index(|name, index| {
if !filters.is_index_authorized(name) {
return Ok(());
for (name, index) in index_scheduler.indexes()? {
if !filters.is_index_authorized(&name) {
continue;
}
database_size += index.on_disk_size()?;
@@ -283,9 +277,8 @@ pub fn create_all_stats(
let updated_at = index.updated_at(&rtxn)?;
last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at)));
indexes.insert(name.to_string(), stats);
Ok(())
})?;
indexes.insert(name, stats);
}
database_size += index_scheduler.size()?;
database_size += auth_controller.size()?;
@@ -334,3 +327,5 @@ pub async fn get_health(
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
}
mod multi_search;

View File

@@ -1,4 +1,3 @@
use actix_http::StatusCode;
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
@@ -42,20 +41,17 @@ pub async fn multi_search_with_post(
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
// changes.
let search_results: Result<_, (ResponseError, usize)> = (|| {
let search_results: Result<_, ResponseError> = (|| {
async {
let mut search_results = Vec::with_capacity(queries.len());
for (query_index, (index_uid, mut query)) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
for (index_uid, mut query) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query)
{
debug!("multi-search #{query_index}: called with params: {:?}", query);
debug!("search called with params: {:?}", query);
// Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) {
return Err(AuthenticationError::InvalidToken).with_index(query_index);
return Err(AuthenticationError::InvalidToken.into());
}
// Apply search rules from tenant token
if let Some(search_rules) =
@@ -64,24 +60,13 @@ pub async fn multi_search_with_post(
add_search_rules(&mut query, search_rules);
}
let index = index_scheduler
.index(&index_uid)
.map_err(|err| {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err
})
.with_index(query_index)?;
let index = index_scheduler.index(&index_uid)?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query))
.await
.with_index(query_index)?;
tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result.with_index(query_index)?,
result: search_result?,
});
}
Ok(search_results)
@@ -94,29 +79,9 @@ pub async fn multi_search_with_post(
}
analytics.post_multi_search(multi_aggregate);
let search_results = search_results.map_err(|(mut err, query_index)| {
// Add the query index that failed as context for the error message.
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
// of result and we can benefit from static typing.
err.message = format!("Inside `.queries[{query_index}]`: {}", err.message);
err
})?;
let search_results = search_results?;
debug!("returns: {:?}", search_results);
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
}
/// Local `Result` extension trait to avoid `map_err` boilerplate.
trait WithIndex {
type T;
/// convert the error type inside of the `Result` to a `ResponseError`, and return a couple of it + the usize.
fn with_index(self, index: usize) -> Result<Self::T, (ResponseError, usize)>;
}
impl<T, E: Into<ResponseError>> WithIndex for Result<T, E> {
type T = T;
fn with_index(self, index: usize) -> Result<T, (ResponseError, usize)> {
self.map_err(|err| (err.into(), index))
}
}

View File

@@ -204,7 +204,7 @@ pub struct SearchHit {
pub matches_position: Option<MatchesPosition>,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
pub hits: Vec<SearchHit>,
@@ -214,11 +214,9 @@ pub struct SearchResult {
pub hits_info: HitsInfo,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultWithIndex {
pub index_uid: String,
@@ -235,12 +233,6 @@ pub enum HitsInfo {
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
}
#[derive(Serialize, Debug, Clone, PartialEq)]
pub struct FacetStats {
pub min: f64,
pub max: f64,
}
/// Incorporate search rules in search query
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
@@ -375,10 +367,9 @@ pub fn perform_search(
&displayed_ids,
);
let mut tokenizer_buidler = TokenizerBuilder::default();
tokenizer_buidler.create_char_map(true);
let tokenizer = TokenizerBuilder::default().build();
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_buidler.build());
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
formatter_builder.crop_marker(query.crop_marker);
formatter_builder.highlight_prefix(query.highlight_pre_tag);
formatter_builder.highlight_suffix(query.highlight_post_tag);
@@ -433,7 +424,7 @@ pub fn perform_search(
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
};
let (facet_distribution, facet_stats) = match query.facets {
let facet_distribution = match query.facets {
Some(ref fields) => {
let mut facet_distribution = index.facets_distribution(&rtxn);
@@ -447,15 +438,11 @@ pub fn perform_search(
facet_distribution.facets(fields);
}
let distribution = facet_distribution.candidates(candidates).execute()?;
let stats = facet_distribution.compute_stats()?;
(Some(distribution), Some(stats))
}
None => (None, None),
};
let facet_stats = facet_stats.map(|stats| {
stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect()
});
Some(distribution)
}
None => None,
};
let result = SearchResult {
hits: documents,
@@ -463,7 +450,6 @@ pub fn perform_search(
query: query.q.clone().unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(),
facet_distribution,
facet_stats,
};
Ok(result)
}

View File

@@ -10,7 +10,7 @@ use crate::common::Server;
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
Lazy::new(|| {
let authorizations = hashmap! {
let mut authorizations = hashmap! {
("POST", "/multi-search") => hashset!{"search", "*"},
("POST", "/indexes/products/search") => hashset!{"search", "*"},
("GET", "/indexes/products/search") => hashset!{"search", "*"},
@@ -52,7 +52,6 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
("GET", "/stats") => hashset!{"stats.get", "stats.*", "*"},
("POST", "/dumps") => hashset!{"dumps.create", "dumps.*", "*"},
("GET", "/version") => hashset!{"version", "*"},
("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"},
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
("GET", "/keys/mykey/") => hashset!{"keys.get", "*"},
("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"},
@@ -60,6 +59,10 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
("GET", "/keys") => hashset!{"keys.get", "*"},
};
if cfg!(feature = "metrics") {
authorizations.insert(("GET", "/metrics"), hashset! {"metrics.get", "metrics.*", "*"});
}
authorizations
});
@@ -75,14 +78,6 @@ static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
})
});
static INVALID_METRICS_RESPONSE: Lazy<Value> = Lazy::new(|| {
json!({"message": "The provided API key is invalid. The API key for the `/metrics` route must allow access to all indexes.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
})
});
const MASTER_KEY: &str = "MASTER_KEY";
#[actix_rt::test]
@@ -210,28 +205,15 @@ async fn access_authorized_restricted_index() {
let (response, code) = server.dummy_request(method, route).await;
// The metrics route MUST have no limitation on the indexes
if *route == "/metrics" {
assert_eq!(
response,
INVALID_METRICS_RESPONSE.clone(),
"on route: {:?} - {:?} with action: {:?}",
method,
route,
action
);
assert_eq!(code, 403);
} else {
assert_ne!(
response,
INVALID_RESPONSE.clone(),
"on route: {:?} - {:?} with action: {:?}",
method,
route,
action
);
assert_ne!(code, 403);
}
assert_ne!(
response,
INVALID_RESPONSE.clone(),
"on route: {:?} - {:?} with action: {:?}",
method,
route,
action
);
assert_ne!(code, 403);
}
}
}

View File

@@ -60,7 +60,7 @@ async fn create_api_key_bad_uid() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.uid`: invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-fA-F-], found `o` at 2",
"message": "Invalid value at `.uid`: invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-zA-Z], found `o` at 2",
"code": "invalid_api_key_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_uid"

View File

@@ -107,18 +107,13 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
])
});
fn invalid_response(query_index: Option<usize>) -> Value {
let message = if let Some(query_index) = query_index {
format!("Inside `.queries[{query_index}]`: The provided API key is invalid.")
} else {
"The provided API key is invalid.".to_string()
};
json!({"message": message,
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
json!({"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
})
}
});
static ACCEPTED_KEYS_SINGLE: Lazy<Vec<Value>> = Lazy::new(|| {
vec![
@@ -379,7 +374,7 @@ macro_rules! compute_authorized_multiple_search {
}
macro_rules! compute_forbidden_single_search {
($tenant_tokens:expr, $parent_keys:expr, $failed_query_indexes:expr) => {
($tenant_tokens:expr, $parent_keys:expr) => {
let mut server = Server::new_auth().await;
server.use_admin_key("MASTER_KEY").await;
let index = server.index("sales");
@@ -402,22 +397,20 @@ macro_rules! compute_forbidden_single_search {
index.wait_task(3).await;
drop(index);
assert_eq!($parent_keys.len(), $failed_query_indexes.len(), "keys != query_indexes");
for (key_content, failed_query_indexes) in $parent_keys.iter().zip($failed_query_indexes.into_iter()) {
for key_content in $parent_keys.iter() {
server.use_api_key("MASTER_KEY");
let (response, code) = server.add_api_key(key_content.clone()).await;
assert_eq!(code, 201, "{:?}", response);
let key = response["key"].as_str().unwrap();
let uid = response["uid"].as_str().unwrap();
assert_eq!($tenant_tokens.len(), failed_query_indexes.len(), "tenant_tokens != query_indexes");
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
for tenant_token in $tenant_tokens.iter() {
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
server.use_api_key(&web_token);
let (response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
assert_eq!(
response,
invalid_response(failed_query_index),
INVALID_RESPONSE.clone(),
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response,
tenant_token,
@@ -434,7 +427,7 @@ macro_rules! compute_forbidden_single_search {
}
macro_rules! compute_forbidden_multiple_search {
($tenant_tokens:expr, $parent_keys:expr, $failed_query_indexes:expr) => {
($tenant_tokens:expr, $parent_keys:expr) => {
let mut server = Server::new_auth().await;
server.use_admin_key("MASTER_KEY").await;
let index = server.index("sales");
@@ -457,16 +450,14 @@ macro_rules! compute_forbidden_multiple_search {
index.wait_task(3).await;
drop(index);
assert_eq!($parent_keys.len(), $failed_query_indexes.len(), "keys != query_indexes");
for (key_content, failed_query_indexes) in $parent_keys.iter().zip($failed_query_indexes.into_iter()) {
for key_content in $parent_keys.iter() {
server.use_api_key("MASTER_KEY");
let (response, code) = server.add_api_key(key_content.clone()).await;
assert_eq!(code, 201, "{:?}", response);
let key = response["key"].as_str().unwrap();
let uid = response["uid"].as_str().unwrap();
assert_eq!($tenant_tokens.len(), failed_query_indexes.len(), "tenant_token != query_indexes");
for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
for tenant_token in $tenant_tokens.iter() {
let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
server.use_api_key(&web_token);
let (response, code) = server.multi_search(json!({"queries" : [
@@ -475,7 +466,7 @@ macro_rules! compute_forbidden_multiple_search {
]})).await;
assert_eq!(
response,
invalid_response(failed_query_index),
INVALID_RESPONSE.clone(),
"{} using tenant_token: {:?} generated with parent_key: {:?}",
response,
tenant_token,
@@ -848,11 +839,7 @@ async fn error_single_search_token_forbidden_parent_key() {
},
];
compute_forbidden_single_search!(
tenant_tokens,
SINGLE_REFUSED_KEYS,
vec![vec![None; 7], vec![None; 7], vec![Some(0); 7], vec![Some(0); 7], vec![Some(0); 7]]
);
compute_forbidden_single_search!(tenant_tokens, SINGLE_REFUSED_KEYS);
}
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
@@ -889,20 +876,7 @@ async fn error_multi_search_token_forbidden_parent_key() {
},
];
compute_forbidden_multiple_search!(
tenant_tokens,
BOTH_REFUSED_KEYS,
vec![
vec![None; 7],
vec![None; 7],
vec![Some(1); 7],
vec![Some(1); 7],
vec![Some(1); 7],
vec![Some(0); 7],
vec![Some(0); 7],
vec![Some(0); 7]
]
);
compute_forbidden_multiple_search!(tenant_tokens, BOTH_REFUSED_KEYS);
}
#[actix_rt::test]
@@ -956,12 +930,7 @@ async fn error_single_search_forbidden_token() {
},
];
let failed_query_indexes: Vec<_> =
std::iter::repeat(Some(0)).take(5).chain(std::iter::repeat(None).take(6)).collect();
let failed_query_indexes = vec![failed_query_indexes; ACCEPTED_KEYS_SINGLE.len()];
compute_forbidden_single_search!(tenant_tokens, ACCEPTED_KEYS_SINGLE, failed_query_indexes);
compute_forbidden_single_search!(tenant_tokens, ACCEPTED_KEYS_SINGLE);
}
#[actix_rt::test]
@@ -1035,15 +1004,7 @@ async fn error_multi_search_forbidden_token() {
},
];
let failed_query_indexes: Vec<_> = std::iter::repeat(Some(0))
.take(5)
.chain(std::iter::repeat(Some(1)).take(5))
.chain(std::iter::repeat(None).take(6))
.collect();
let failed_query_indexes = vec![failed_query_indexes; ACCEPTED_KEYS_BOTH.len()];
compute_forbidden_multiple_search!(tenant_tokens, ACCEPTED_KEYS_BOTH, failed_query_indexes);
compute_forbidden_multiple_search!(tenant_tokens, ACCEPTED_KEYS_BOTH);
}
#[actix_rt::test]
@@ -1076,7 +1037,7 @@ async fn error_access_expired_parent_key() {
let (response, code) = server
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
.await;
assert_ne!(response, invalid_response(None));
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
// wait until the key is expired.
@@ -1085,7 +1046,7 @@ async fn error_access_expired_parent_key() {
let (response, code) = server
.multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
.await;
assert_eq!(response, invalid_response(None));
assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403);
}
@@ -1117,7 +1078,7 @@ async fn error_access_modified_token() {
// test search request while web_token is valid
let (response, code) =
server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await;
assert_ne!(response, invalid_response(Some(0)));
assert_ne!(response, INVALID_RESPONSE.clone());
assert_ne!(code, 403);
let tenant_token = hashmap! {
@@ -1136,6 +1097,6 @@ async fn error_access_modified_token() {
server.use_api_key(&altered_token);
let (response, code) =
server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await;
assert_eq!(response, invalid_response(None));
assert_eq!(response, INVALID_RESPONSE.clone());
assert_eq!(code, 403);
}

View File

@@ -30,7 +30,7 @@ impl Index<'_> {
.post_str(
url,
include_str!("../assets/test_set.json"),
vec![("content-type", "application/json")],
("content-type", "application/json"),
)
.await;
assert_eq!(code, 202);
@@ -46,7 +46,7 @@ impl Index<'_> {
.post_str(
url,
include_str!("../assets/test_set.ndjson"),
vec![("content-type", "application/x-ndjson")],
("content-type", "application/x-ndjson"),
)
.await;
assert_eq!(code, 202);
@@ -96,21 +96,6 @@ impl Index<'_> {
self.service.post_encoded(url, documents, self.encoder).await
}
pub async fn raw_add_documents(
&self,
payload: &str,
content_type: Option<&str>,
query_parameter: &str,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
if let Some(content_type) = content_type {
self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await
} else {
self.service.post_str(url, payload, Vec::new()).await
}
}
pub async fn update_documents(
&self,
documents: Value,
@@ -125,21 +110,6 @@ impl Index<'_> {
self.service.put_encoded(url, documents, self.encoder).await
}
pub async fn raw_update_documents(
&self,
payload: &str,
content_type: Option<&str>,
query_parameter: &str,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
if let Some(content_type) = content_type {
self.service.put_str(url, payload, vec![("Content-Type", content_type)]).await
} else {
self.service.put_str(url, payload, Vec::new()).await
}
}
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);

View File

@@ -205,10 +205,10 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
indexer_options: IndexerOpts {
// memory has to be unlimited because several meilisearch are running in test context.
max_indexing_memory: MaxMemory::unlimited(),
skip_index_budget: true,
..Parser::parse_from(None as Option<&str>)
},
experimental_enable_metrics: true,
#[cfg(feature = "metrics")]
enable_metrics_route: true,
..Parser::parse_from(None as Option<&str>)
}
}

View File

@@ -34,18 +34,17 @@ impl Service {
self.request(req).await
}
/// Send a test post request from a text body.
/// Send a test post request from a text body, with a `content-type:application/json` header.
pub async fn post_str(
&self,
url: impl AsRef<str>,
body: impl AsRef<str>,
headers: Vec<(&str, &str)>,
header: (&str, &str),
) -> (Value, StatusCode) {
let mut req =
test::TestRequest::post().uri(url.as_ref()).set_payload(body.as_ref().to_string());
for header in headers {
req = req.insert_header(header);
}
let req = test::TestRequest::post()
.uri(url.as_ref())
.set_payload(body.as_ref().to_string())
.insert_header(header);
self.request(req).await
}
@@ -58,21 +57,6 @@ impl Service {
self.put_encoded(url, body, Encoder::Plain).await
}
/// Send a test put request from a text body.
pub async fn put_str(
&self,
url: impl AsRef<str>,
body: impl AsRef<str>,
headers: Vec<(&str, &str)>,
) -> (Value, StatusCode) {
let mut req =
test::TestRequest::put().uri(url.as_ref()).set_payload(body.as_ref().to_string());
for header in headers {
req = req.insert_header(header);
}
self.request(req).await
}
pub async fn put_encoded(
&self,
url: impl AsRef<str>,

View File

@@ -216,133 +216,6 @@ async fn add_single_document_with_every_encoding() {
}
}
#[actix_rt::test]
async fn add_csv_document() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id,name,race
0,jean,bernese mountain
1,jorts,orange cat";
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "pets",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 2,
"indexedDocuments": 2
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"#id": "0",
"name": "jean",
"race": "bernese mountain"
},
{
"#id": "1",
"name": "jorts",
"race": "orange cat"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
#[actix_rt::test]
async fn add_csv_document_with_custom_delimiter() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id|name|race
0|jean|bernese mountain
1|jorts|orange cat";
let (response, code) =
index.raw_update_documents(document, Some("text/csv"), "?csvDelimiter=|").await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "pets",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 2,
"indexedDocuments": 2
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"#id": "0",
"name": "jean",
"race": "bernese mountain"
},
{
"#id": "1",
"name": "jorts",
"race": "orange cat"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
/// any other content-type is must be refused
#[actix_rt::test]
async fn error_add_documents_test_bad_content_types() {
@@ -1154,53 +1027,6 @@ async fn error_document_field_limit_reached() {
@"");
}
#[actix_rt::test]
async fn add_documents_with_geo_field() {
let server = Server::new().await;
let index = server.index("doggo");
index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
let documents = json!([
{
"id": "1",
},
{
"id": "2",
"_geo": null,
},
{
"id": "3",
"_geo": { "lat": 1, "lng": 1 },
},
{
"id": "4",
"_geo": { "lat": "1", "lng": "1" },
},
]);
index.add_documents(documents, None).await;
let response = index.wait_task(1).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 1,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 4,
"indexedDocuments": 4
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn add_documents_invalid_geo_field() {
let server = Server::new().await;

View File

@@ -1,6 +1,5 @@
use meili_snap::*;
use serde_json::json;
use urlencoding::encode;
use crate::common::Server;
@@ -98,323 +97,3 @@ async fn delete_documents_batch() {
}
"###);
}
#[actix_rt::test]
async fn replace_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("application/json"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A json payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A ndjson payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A csv payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
}
#[actix_rt::test]
async fn update_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", Some("application/json"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A json payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_update_documents("", Some("application/x-ndjson"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A ndjson payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_update_documents("", Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A csv payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", None, "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
// even with a csv delimiter specified this error is triggered first
let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", None, "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
// even with a csv delimiter specified this error is triggered first
let (response, code) = index.raw_update_documents("", None, "?csvDelimiter=;").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("doggo"), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", Some("doggo"), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
"code": "invalid_document_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
"code": "invalid_document_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
}
"###);
let (response, code) = index
.raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰")))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "csv delimiter must be an ascii character. Found: `🍰`",
"code": "invalid_document_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
}
"###);
}
#[actix_rt::test]
async fn update_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
"code": "invalid_document_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
}
"###);
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
"code": "invalid_document_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
}
"###);
let (response, code) = index
.raw_update_documents(
"",
Some("application/json"),
&format!("?csvDelimiter={}", encode("🍰")),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "csv delimiter must be an ascii character. Found: `🍰`",
"code": "invalid_document_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
let (response, code) =
index.raw_update_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}

View File

@@ -442,37 +442,3 @@ async fn displayedattr_2_smol() {
)
.await;
}
#[cfg(feature = "default")]
#[actix_rt::test]
async fn test_cjk_highlight() {
let server = Server::new().await;
let index = server.index("test");
let documents = json!([
{ "id": 0, "title": "この度、クーポンで無料で頂きました。" },
{ "id": 1, "title": "大卫到了扫罗那里" },
]);
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.search(json!({"q": "", "attributesToHighlight": ["title"]}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0]["_formatted"]["title"],
json!("この度、クーポン<em>で</em>無料<em>で</em>頂きました。")
);
})
.await;
index
.search(json!({"q": "大卫", "attributesToHighlight": ["title"]}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0]["_formatted"]["title"],
json!("<em>大卫</em>到了扫罗那里")
);
})
.await;
}

View File

@@ -149,49 +149,6 @@ async fn simple_search() {
.await;
}
#[actix_rt::test]
async fn phrase_search_with_stop_word() {
// related to https://github.com/meilisearch/meilisearch/issues/3521
let server = Server::new().await;
let index = server.index("test");
let (_, code) = index.update_settings(json!({"stopWords": ["the", "of"]})).await;
meili_snap::snapshot!(code, @"202 Accepted");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
index
.search(json!({"q": "how \"to\" train \"the" }), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
})
.await;
}
#[cfg(feature = "default")]
#[actix_rt::test]
async fn test_kanji_language_detection() {
let server = Server::new().await;
let index = server.index("test");
let documents = json!([
{ "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
{ "id": 1, "title": "東京のお寿司。" },
{ "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
]);
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.search(json!({"q": "東京"}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
})
.await;
}
#[actix_rt::test]
async fn search_multiple_params() {
let server = Server::new().await;

View File

@@ -246,10 +246,10 @@ async fn search_one_index_doesnt_exist() {
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Inside `.queries[1]`: Index `nested` not found.",
"message": "Index `nested` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
@@ -267,10 +267,10 @@ async fn search_multiple_indexes_dont_exist() {
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Inside `.queries[0]`: Index `test` not found.",
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
@@ -302,7 +302,7 @@ async fn search_one_query_error() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Inside `.queries[0]`: Invalid facet distribution, this index does not have configured filterable attributes.",
"message": "Invalid facet distribution, this index does not have configured filterable attributes.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
@@ -334,7 +334,7 @@ async fn search_multiple_query_errors() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Inside `.queries[0]`: Invalid facet distribution, this index does not have configured filterable attributes.",
"message": "Invalid facet distribution, this index does not have configured filterable attributes.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"

View File

@@ -16,10 +16,10 @@ bimap = { version = "0.6.2", features = ["serde"] }
bincode = "1.3.3"
bstr = "1.0.1"
byteorder = "1.4.3"
charabia = { version = "0.7.1", default-features = false }
charabia = { version = "0.7.0", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.6"
deserr = "0.5.0"
deserr = "0.4.1"
either = "1.8.0"
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"

View File

@@ -59,8 +59,6 @@ pub enum InternalError {
Utf8(#[from] str::Utf8Error),
#[error("An indexation process was explicitly aborted.")]
AbortedIndexation,
#[error("The matching words list contains at least one invalid member.")]
InvalidMatchingWords,
}
#[derive(Error, Debug)]

View File

@@ -5,7 +5,6 @@ mod field_id_word_count_codec;
mod obkv_codec;
mod roaring_bitmap;
mod roaring_bitmap_length;
mod script_language_codec;
mod str_beu32_codec;
mod str_ref;
mod str_str_u8_codec;
@@ -20,6 +19,5 @@ pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, Roar
pub use self::roaring_bitmap_length::{
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
};
pub use self::script_language_codec::ScriptLanguageCodec;
pub use self::str_beu32_codec::StrBEU32Codec;
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};

View File

@@ -1,38 +0,0 @@
use std::borrow::Cow;
use std::str;
use charabia::{Language, Script};
pub struct ScriptLanguageCodec;
impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
type DItem = (Script, Language);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let sep = bytes.iter().position(|b| *b == 0)?;
let (s_bytes, l_bytes) = bytes.split_at(sep);
let script = str::from_utf8(s_bytes).ok()?;
let script_name = Script::from_name(script);
let lan = str::from_utf8(l_bytes).ok()?;
// skip '\0' byte between the two strings.
let lan_name = Language::from_name(&lan[1..]);
Some((script_name, lan_name))
}
}
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
type EItem = (Script, Language);
fn bytes_encode((script, lan): &Self::EItem) -> Option<Cow<[u8]>> {
let script_name = script.name().as_bytes();
let lan_name = lan.name().as_bytes();
let mut bytes = Vec::with_capacity(script_name.len() + lan_name.len() + 1);
bytes.extend_from_slice(script_name);
bytes.push(0);
bytes.extend_from_slice(lan_name);
Some(Cow::Owned(bytes))
}
}

View File

@@ -4,7 +4,6 @@ use std::fs::File;
use std::mem::size_of;
use std::path::Path;
use charabia::{Language, Script};
use heed::flags::Flags;
use heed::types::*;
use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn};
@@ -19,7 +18,7 @@ use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
FieldIdCodec, OrderedF64Codec,
};
use crate::heed_codec::{ScriptLanguageCodec, StrRefCodec};
use crate::heed_codec::StrRefCodec;
use crate::{
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
@@ -84,7 +83,6 @@ pub mod db_name {
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
pub const DOCUMENTS: &str = "documents";
pub const SCRIPT_LANGUAGE_DOCIDS: &str = "script_language_docids";
}
#[derive(Clone)]
@@ -124,9 +122,6 @@ pub struct Index {
/// Maps the position of a word prefix with all the docids where this prefix appears.
pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
/// Maps the script and language with all the docids that corresponds to it.
pub script_language_docids: Database<ScriptLanguageCodec, RoaringBitmapCodec>,
/// Maps the facet field id and the docids for which this field exists
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
@@ -153,7 +148,7 @@ impl Index {
) -> Result<Index> {
use db_name::*;
options.max_dbs(19);
options.max_dbs(18);
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?;
@@ -164,7 +159,6 @@ impl Index {
let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?;
let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
let script_language_docids = env.create_database(Some(SCRIPT_LANGUAGE_DOCIDS))?;
let word_prefix_pair_proximity_docids =
env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
let prefix_word_pair_proximity_docids =
@@ -192,7 +186,6 @@ impl Index {
exact_word_prefix_docids,
docid_word_positions,
word_pair_proximity_docids,
script_language_docids,
word_prefix_pair_proximity_docids,
prefix_word_pair_proximity_docids,
word_position_docids,
@@ -1194,38 +1187,6 @@ impl Index {
pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
}
/* script language docids */
/// Retrieve all the documents ids that correspond with (Script, Language) key, `None` if it is any.
pub fn script_language_documents_ids(
&self,
rtxn: &RoTxn,
key: &(Script, Language),
) -> heed::Result<Option<RoaringBitmap>> {
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
let doc_ids = self.script_language_docids.get(rtxn, key)?;
Ok(doc_ids.map(|ids| ids - soft_deleted_documents))
}
pub fn script_language(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Script, Vec<Language>>> {
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
let mut script_language: HashMap<Script, Vec<Language>> = HashMap::new();
for sl in self.script_language_docids.iter(rtxn)? {
let ((script, language), docids) = sl?;
// keep only Languages that contains at least 1 document.
if !soft_deleted_documents.is_superset(&docids) {
if let Some(languages) = script_language.get_mut(&script) {
(*languages).push(language);
} else {
script_language.insert(script, vec![language]);
}
}
}
Ok(script_language)
}
}
#[cfg(test)]

View File

@@ -1,6 +1,5 @@
use std::mem::take;
use heed::BytesDecode;
use itertools::Itertools;
use log::debug;
use ordered_float::OrderedFloat;
@@ -8,7 +7,7 @@ use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
@@ -197,38 +196,6 @@ fn facet_ordered_iterative<'t>(
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
}
fn facet_extreme_value<'t>(
mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,
) -> Result<Option<f64>> {
let extreme_value =
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
let (_, extreme_value) = extreme_value?;
Ok(OrderedF64Codec::bytes_decode(extreme_value))
}
pub fn facet_min_value<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
pub fn facet_max_value<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
fn facet_ordered_set_based<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
@@ -236,24 +203,23 @@ fn facet_ordered_set_based<'t>(
is_ascending: bool,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
let number_db =
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let string_db =
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
let (number_iter, string_iter) = if is_ascending {
let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?;
let number_iter = make_iter(
rtxn,
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates.clone(),
)?;
(itertools::Either::Left(number_iter), itertools::Either::Left(string_iter))
} else {
let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?;
let string_iter = make_iter(
rtxn,
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates,
)?;
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
};
Ok(Box::new(number_iter.chain(string_iter).map(|res| res.map(|(doc_ids, _)| doc_ids))))
Ok(Box::new(number_iter.chain(string_iter)))
}
/// Returns an iterator over groups of the given candidates in ascending or descending order.

View File

@@ -21,7 +21,6 @@ use crate::update::{MAX_LENGTH_FOR_PREFIX_PROXIMITY_DB, MAX_PROXIMITY_FOR_PREFIX
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
mod asc_desc;
pub use asc_desc::{facet_max_value, facet_min_value};
mod attribute;
mod exactness;
pub mod r#final;

View File

@@ -278,65 +278,6 @@ impl<'a> FacetDistribution<'a> {
}
}
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let candidates = if let Some(candidates) = self.candidates.clone() {
candidates
} else {
return Ok(Default::default());
};
let fields = match &self.facets {
Some(facets) => {
let invalid_fields: HashSet<_> = facets
.iter()
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
.collect();
if !invalid_fields.is_empty() {
return Err(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
valid_facets_name: filterable_fields.into_iter().collect(),
}
.into());
} else {
facets.clone()
}
}
None => filterable_fields,
};
let mut distribution = BTreeMap::new();
for (fid, name) in fields_ids_map.iter() {
if crate::is_faceted(name, &fields) {
let min_value = if let Some(min_value) = crate::search::criteria::facet_min_value(
self.index,
self.rtxn,
fid,
candidates.clone(),
)? {
min_value
} else {
continue;
};
let max_value = if let Some(max_value) = crate::search::criteria::facet_max_value(
self.index,
self.rtxn,
fid,
candidates.clone(),
)? {
max_value
} else {
continue;
};
distribution.insert(name.to_string(), (min_value, max_value));
}
}
Ok(distribution)
}
pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
@@ -596,216 +537,4 @@ mod tests {
milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992");
}
#[test]
fn facet_stats() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = serde_json::json!({
"colour": facet_values[i % 1000],
})
.as_object()
.unwrap()
.clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
}
#[test]
fn facet_stats_array() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = serde_json::json!({
"colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
})
.as_object()
.unwrap()
.clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1999.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 1776.0)}"###);
}
#[test]
fn facet_stats_mixed_array() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = serde_json::json!({
"colour": [facet_values[i % 1000], format!("{}", facet_values[i % 1000] + 1000)],
})
.as_object()
.unwrap()
.clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
}
#[test]
fn facet_mixed_values() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = if i % 2 == 0 {
serde_json::json!({
"colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
})
} else {
serde_json::json!({
"colour": format!("{}", facet_values[i % 1000] + 10000),
})
};
let document = document.as_object().unwrap().clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1998.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (218.0, 1776.0)}"###);
}
}

View File

@@ -34,20 +34,15 @@ pub fn ascending_facet_sort<'t>(
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
Ok(itertools::Either::Left(AscendingFacetSort {
rtxn,
db,
field_id,
stack: vec![(candidates, iter)],
}))
Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }))
} else {
Ok(itertools::Either::Right(std::iter::empty()))
Ok(Box::new(std::iter::empty()))
}
}
@@ -65,7 +60,7 @@ struct AscendingFacetSort<'t, 'e> {
}
impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
type Item = Result<(RoaringBitmap, &'t [u8])>;
type Item = Result<RoaringBitmap>;
fn next(&mut self) -> Option<Self::Item> {
'outer: loop {
@@ -95,8 +90,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
*documents_ids -= &bitmap;
if level == 0 {
// Since the level is 0, the left_bound is the exact value.
return Some(Ok((bitmap, left_bound)));
return Some(Ok(bitmap));
}
let starting_key_below =
FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound };
@@ -136,7 +130,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -158,7 +152,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -167,7 +161,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -189,7 +183,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -198,7 +192,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -220,7 +214,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}

View File

@@ -17,21 +17,21 @@ pub fn descending_facet_sort<'t>(
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
Ok(itertools::Either::Left(DescendingFacetSort {
Ok(Box::new(DescendingFacetSort {
rtxn,
db,
field_id,
stack: vec![(candidates, iter, Bound::Included(last_bound))],
}))
} else {
Ok(itertools::Either::Right(std::iter::empty()))
Ok(Box::new(std::iter::empty()))
}
}
@@ -50,7 +50,7 @@ struct DescendingFacetSort<'t> {
}
impl<'t> Iterator for DescendingFacetSort<'t> {
type Item = Result<(RoaringBitmap, &'t [u8])>;
type Item = Result<RoaringBitmap>;
fn next(&mut self) -> Option<Self::Item> {
'outer: loop {
@@ -77,8 +77,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
*documents_ids -= &bitmap;
if level == 0 {
// Since we're at the level 0 the left_bound is the exact value.
return Some(Ok((bitmap, left_bound)));
return Some(Ok(bitmap));
}
let starting_key_below =
FacetGroupKey { field_id, level: level - 1, left_bound };
@@ -147,7 +146,7 @@ mod tests {
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -170,7 +169,7 @@ mod tests {
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -180,7 +179,7 @@ mod tests {
let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -201,7 +200,7 @@ mod tests {
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -210,7 +209,7 @@ mod tests {
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -232,7 +231,7 @@ mod tests {
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
let docids = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}

View File

@@ -7,7 +7,6 @@ use std::rc::Rc;
use charabia::Token;
use levenshtein_automata::{Distance, DFA};
use crate::error::InternalError;
use crate::search::build_dfa;
use crate::MAX_WORD_LENGTH;
@@ -32,19 +31,12 @@ impl fmt::Debug for MatchingWords {
}
impl MatchingWords {
pub fn new(
mut matching_words: Vec<(Vec<Rc<MatchingWord>>, Vec<PrimitiveWordId>)>,
) -> crate::Result<Self> {
// if one of the matching_words vec doesn't contain a word.
if matching_words.iter().any(|(mw, _)| mw.is_empty()) {
return Err(InternalError::InvalidMatchingWords.into());
}
pub fn new(mut matching_words: Vec<(Vec<Rc<MatchingWord>>, Vec<PrimitiveWordId>)>) -> Self {
// Sort word by len in DESC order prioritizing the longuest matches,
// in order to highlight the longuest part of the matched word.
matching_words.sort_unstable_by_key(|(mw, _)| Reverse((mw.len(), mw[0].word.len())));
Ok(Self { inner: matching_words })
Self { inner: matching_words }
}
/// Returns an iterator over terms that match or partially match the given token.
@@ -368,7 +360,7 @@ mod tests {
(vec![all[2].clone()], vec![2]),
];
let matching_words = MatchingWords::new(matching_words).unwrap();
let matching_words = MatchingWords::new(matching_words);
assert_eq!(
matching_words

View File

@@ -513,7 +513,7 @@ mod tests {
(vec![all[2].clone()], vec![2]),
];
MatchingWords::new(matching_words).unwrap()
MatchingWords::new(matching_words)
}
impl MatcherBuilder<'_, Vec<u8>> {
@@ -600,7 +600,7 @@ mod tests {
];
let matching_words = vec![(vec![all[0].clone()], vec![0]), (vec![all[1].clone()], vec![1])];
let matching_words = MatchingWords::new(matching_words).unwrap();
let matching_words = MatchingWords::new(matching_words);
let builder = MatcherBuilder::from_matching_words(matching_words);
@@ -847,7 +847,7 @@ mod tests {
(vec![all[4].clone()], vec![2]),
];
let matching_words = MatchingWords::new(matching_words).unwrap();
let matching_words = MatchingWords::new(matching_words);
let mut builder = MatcherBuilder::from_matching_words(matching_words);
builder.highlight_prefix("_".to_string());

View File

@@ -152,11 +152,6 @@ impl<'a> Search<'a> {
tokbuilder.stop_words(stop_words);
}
let script_lang_map = self.index.script_language(self.rtxn)?;
if !script_lang_map.is_empty() {
tokbuilder.allow_list(&script_lang_map);
}
let tokenizer = tokbuilder.build();
let tokens = tokenizer.tokenize(query);
builder
@@ -451,28 +446,6 @@ mod test {
use super::*;
use crate::index::tests::TempIndex;
#[cfg(feature = "default")]
#[test]
fn test_kanji_language_detection() {
let index = TempIndex::new();
index
.add_documents(documents!([
{ "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
{ "id": 1, "title": "東京のお寿司。" },
{ "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
]))
.unwrap();
let txn = index.write_txn().unwrap();
let mut search = Search::new(&txn, &index);
search.query("東京");
let SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![1]);
}
#[test]
fn test_is_authorized_typos() {
let index = TempIndex::new();

View File

@@ -747,7 +747,7 @@ fn create_matching_words(
let mut matching_word_cache = MatchingWordCache::default();
let mut matching_words = Vec::new();
ngrams(ctx, authorize_typos, query, &mut matching_words, &mut matching_word_cache, 0)?;
MatchingWords::new(matching_words)
Ok(MatchingWords::new(matching_words))
}
pub type PrimitiveQuery = Vec<PrimitiveQueryPart>;
@@ -825,13 +825,9 @@ where
quoted = !quoted;
}
// if there is a quote or a hard separator we close the phrase.
if quote_count > 0 || separator_kind == SeparatorKind::Hard {
let phrase = mem::take(&mut phrase);
// if the phrase only contains stop words, we don't keep it in the query.
if phrase.iter().any(|w| w.is_some()) {
primitive_query.push(PrimitiveQueryPart::Phrase(phrase));
}
if !phrase.is_empty() && (quote_count > 0 || separator_kind == SeparatorKind::Hard)
{
primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase)));
}
}
_ => (),
@@ -839,7 +835,7 @@ where
}
// If a quote is never closed, we consider all of the end of the query as a phrase.
if phrase.iter().any(|w| w.is_some()) {
if !phrase.is_empty() {
primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase)));
}

View File

@@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::{make_db_snap_from_iter, obkv_to_json, ExternalDocumentsIds, Index};
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
#[track_caller]
pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> (insta::Settings, String) {
@@ -427,26 +427,8 @@ pub fn snap_settings(index: &Index) -> String {
snap
}
pub fn snap_documents(index: &Index) -> String {
let mut snap = String::new();
let rtxn = index.read_txn().unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let display = fields_ids_map.ids().collect::<Vec<_>>();
for document in index.all_documents(&rtxn).unwrap() {
let doc = obkv_to_json(&display, &fields_ids_map, document.unwrap().1).unwrap();
snap.push_str(&serde_json::to_string(&doc).unwrap());
snap.push('\n');
}
snap
}
#[macro_export]
macro_rules! full_snap_of_db {
($index:ident, documents) => {{
$crate::snapshot_tests::snap_documents(&$index)
}};
($index:ident, settings) => {{
$crate::snapshot_tests::snap_settings(&$index)
}};

View File

@@ -30,7 +30,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
word_position_docids,
field_id_word_count_docids,
word_prefix_position_docids,
script_language_docids,
facet_id_f64_docids,
facet_id_string_docids,
facet_id_exists_docids,
@@ -83,7 +82,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
word_position_docids.clear(self.wtxn)?;
field_id_word_count_docids.clear(self.wtxn)?;
word_prefix_position_docids.clear(self.wtxn)?;
script_language_docids.clear(self.wtxn)?;
facet_id_f64_docids.clear(self.wtxn)?;
facet_id_exists_docids.clear(self.wtxn)?;
facet_id_string_docids.clear(self.wtxn)?;

View File

@@ -243,7 +243,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
facet_id_string_docids: _,
field_id_docid_facet_f64s: _,
field_id_docid_facet_strings: _,
script_language_docids,
facet_id_exists_docids,
documents,
} = self.index;
@@ -500,22 +499,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
.execute(self.wtxn)?;
}
// Remove the documents ids from the script language database.
let mut iter = script_language_docids.iter_mut(self.wtxn)?;
while let Some((key, mut docids)) = iter.next().transpose()? {
let previous_len = docids.len();
docids -= &self.to_delete_docids;
if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
} else if docids.len() != previous_len {
let key = key.to_owned();
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(&key, &docids)? };
}
}
drop(iter);
// We delete the documents ids that are under the facet field id values.
remove_docids_from_facet_id_exists_docids(
self.wtxn,
@@ -1183,52 +1166,4 @@ mod tests {
stats_should_not_return_deleted_documents_(DeletionStrategy::AlwaysHard);
stats_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
}
fn stored_detected_script_and_language_should_not_return_deleted_documents_(
deletion_strategy: DeletionStrategy,
) {
use charabia::{Language, Script};
let index = TempIndex::new();
let mut wtxn = index.write_txn().unwrap();
index
.add_documents_using_wtxn(
&mut wtxn,
documents!([
{ "id": "0", "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
{ "id": "1", "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
{ "id": "2", "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
{ "id": "3", "title": "関西国際空港限定トートバッグ すもももももももものうち" },
{ "id": "4", "title": "ภาษาไทยง่ายนิดเดียว" },
{ "id": "5", "title": "The quick 在尊嚴和權利上一律平等。" },
]))
.unwrap();
let key_cmn = (Script::Cj, Language::Cmn);
let cj_cmn_docs =
index.script_language_documents_ids(&wtxn, &key_cmn).unwrap().unwrap_or_default();
let mut expected_cj_cmn_docids = RoaringBitmap::new();
expected_cj_cmn_docids.push(1);
expected_cj_cmn_docids.push(5);
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
delete_documents(&mut wtxn, &index, &["1"], deletion_strategy);
wtxn.commit().unwrap();
let rtxn = index.read_txn().unwrap();
let cj_cmn_docs =
index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap_or_default();
let mut expected_cj_cmn_docids = RoaringBitmap::new();
expected_cj_cmn_docids.push(5);
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
}
#[test]
fn stored_detected_script_and_language_should_not_return_deleted_documents() {
stored_detected_script_and_language_should_not_return_deleted_documents_(
DeletionStrategy::AlwaysHard,
);
stored_detected_script_and_language_should_not_return_deleted_documents_(
DeletionStrategy::AlwaysSoft,
);
}
}

View File

@@ -395,7 +395,6 @@ pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result<StdResult
(Some(_), None) => Ok(Err(MissingLongitude { document_id: debug_id() })),
(None, None) => Ok(Err(MissingLatitudeAndLongitude { document_id: debug_id() })),
},
Value::Null => Ok(Ok(())),
value => Ok(Err(NotAnObject { document_id: debug_id(), value })),
}
}

View File

@@ -1,9 +1,9 @@
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::convert::TryInto;
use std::fs::File;
use std::{io, mem, str};
use charabia::{Language, Script, SeparatorKind, Token, TokenKind, TokenizerBuilder};
use charabia::{SeparatorKind, Token, TokenKind, TokenizerBuilder};
use roaring::RoaringBitmap;
use serde_json::Value;
@@ -13,8 +13,6 @@ use crate::{
absolute_from_relative_position, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH,
};
pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), RoaringBitmap>;
/// Extracts the word and positions where this word appear and
/// prefixes it by the document id.
///
@@ -27,13 +25,12 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
searchable_fields: &Option<HashSet<FieldId>>,
stop_words: Option<&fst::Set<&[u8]>>,
max_positions_per_attributes: Option<u32>,
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
) -> Result<(RoaringBitmap, grenad::Reader<File>)> {
let max_positions_per_attributes = max_positions_per_attributes
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
let max_memory = indexer.max_memory_by_thread();
let mut documents_ids = RoaringBitmap::new();
let mut script_language_pair = HashMap::new();
let mut docid_word_positions_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
concat_u32s_array,
@@ -73,13 +70,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
for (index, token) in tokens {
if let Some(language) = token.language {
let script = token.script;
let entry = script_language_pair
.entry((script, language))
.or_insert_with(RoaringBitmap::new);
entry.push(document_id);
}
let token = token.lemma().trim();
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
key_buffer.truncate(mem::size_of::<u32>());
@@ -98,8 +88,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
}
}
sorter_into_reader(docid_word_positions_sorter, indexer)
.map(|reader| (documents_ids, reader, script_language_pair))
sorter_into_reader(docid_word_positions_sorter, indexer).map(|reader| (documents_ids, reader))
}
/// Transform a JSON value into a string that can be indexed.

View File

@@ -59,7 +59,6 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
} else if lat.is_some() && lng.is_none() {
return Err(GeoError::MissingLongitude { document_id: document_id() })?;
}
// else => the _geo object was `null`, there is nothing to do
}
writer_into_reader(writer)

View File

@@ -257,14 +257,13 @@ fn send_and_extract_flattened_documents_data(
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
rayon::join(
|| {
let (documents_ids, docid_word_positions_chunk, script_language_pair) =
extract_docid_word_positions(
flattened_documents_chunk.clone(),
indexer,
searchable_fields,
stop_words.as_ref(),
max_positions_per_attributes,
)?;
let (documents_ids, docid_word_positions_chunk) = extract_docid_word_positions(
flattened_documents_chunk.clone(),
indexer,
searchable_fields,
stop_words.as_ref(),
max_positions_per_attributes,
)?;
// send documents_ids to DB writer
let _ = lmdb_writer_sx.send(Ok(TypedChunk::NewDocumentsIds(documents_ids)));
@@ -275,9 +274,6 @@ fn send_and_extract_flattened_documents_data(
let _ = lmdb_writer_sx
.send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone())));
let _ =
lmdb_writer_sx.send(Ok(TypedChunk::ScriptLanguageDocids(script_language_pair)));
Ok(docid_word_positions_chunk)
},
|| {

View File

@@ -6,7 +6,6 @@ use roaring::RoaringBitmap;
use super::read_u32_ne_bytes;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::update::index_documents::transform::Operation;
use crate::Result;
pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>;
@@ -58,6 +57,21 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
Ok(obkvs.last().unwrap().clone())
}
/// Merge all the obks in the order we see them.
pub fn merge_obkvs<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
Ok(obkvs
.iter()
.cloned()
.reduce(|acc, current| {
let first = obkv::KvReader::new(&acc);
let second = obkv::KvReader::new(&current);
let mut buffer = Vec::new();
merge_two_obkvs(first, second, &mut buffer);
Cow::from(buffer)
})
.unwrap())
}
pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffer: &mut Vec<u8>) {
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};
@@ -74,41 +88,6 @@ pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffe
writer.finish().unwrap();
}
/// Merge all the obks in the order we see them.
pub fn merge_obkvs_and_operations<'a>(
_key: &[u8],
obkvs: &[Cow<'a, [u8]>],
) -> Result<Cow<'a, [u8]>> {
// [add, add, delete, add, add]
// we can ignore everything that happened before the last delete.
let starting_position =
obkvs.iter().rposition(|obkv| obkv[0] == Operation::Deletion as u8).unwrap_or(0);
// [add, add, delete]
// if the last operation was a deletion then we simply return the deletion
if starting_position == obkvs.len() - 1 && obkvs.last().unwrap()[0] == Operation::Deletion as u8
{
return Ok(obkvs[obkvs.len() - 1].clone());
}
let mut buffer = Vec::new();
// (add, add, delete) [add, add]
// in the other case, no deletion will be encountered during the merge
let mut ret =
obkvs[starting_position..].iter().cloned().fold(Vec::new(), |mut acc, current| {
let first = obkv::KvReader::new(&acc);
let second = obkv::KvReader::new(&current[1..]);
merge_two_obkvs(first, second, &mut buffer);
// we want the result of the merge into our accumulator
std::mem::swap(&mut acc, &mut buffer);
acc
});
ret.insert(0, Operation::Addition as u8);
Ok(Cow::from(ret))
}
pub fn merge_cbo_roaring_bitmaps<'a>(
_key: &[u8],
values: &[Cow<'a, [u8]>],

View File

@@ -13,9 +13,9 @@ pub use grenad_helpers::{
GrenadParameters, MergeableReader,
};
pub use merge_functions::{
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps,
merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn,
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,
merge_roaring_bitmaps, merge_two_obkvs, roaring_bitmap_from_u32s_array,
serialize_roaring_bitmap, MergeFn,
};
use crate::MAX_WORD_LENGTH;

View File

@@ -79,7 +79,6 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
progress: FP,
should_abort: FA,
added_documents: u64,
deleted_documents: u64,
}
#[derive(Default, Debug, Clone)]
@@ -123,7 +122,6 @@ where
wtxn,
index,
added_documents: 0,
deleted_documents: 0,
})
}
@@ -168,30 +166,6 @@ where
Ok((self, Ok(indexed_documents)))
}
/// Remove a batch of documents from the current builder.
///
/// Returns the number of documents deleted from the builder.
pub fn remove_documents(
mut self,
to_delete: Vec<String>,
) -> Result<(Self, StdResult<u64, UserError>)> {
// Early return when there is no document to add
if to_delete.is_empty() {
return Ok((self, Ok(0)));
}
let deleted_documents = self
.transform
.as_mut()
.expect("Invalid document deletion state")
.remove_documents(to_delete, self.wtxn, &self.should_abort)?
as u64;
self.deleted_documents += deleted_documents;
Ok((self, Ok(deleted_documents)))
}
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
if self.added_documents == 0 {
@@ -1905,355 +1879,4 @@ mod tests {
index.add_documents(doc1).unwrap();
}
#[cfg(feature = "default")]
#[test]
fn store_detected_script_and_language_per_document_during_indexing() {
use charabia::{Language, Script};
let index = TempIndex::new();
index
.add_documents(documents!([
{ "id": 1, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
{ "id": 2, "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
{ "id": 3, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
{ "id": 4, "title": "関西国際空港限定トートバッグ すもももももももものうち" },
{ "id": 5, "title": "ภาษาไทยง่ายนิดเดียว" },
{ "id": 6, "title": "The quick 在尊嚴和權利上一律平等。" },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let key_jpn = (Script::Cj, Language::Jpn);
let key_cmn = (Script::Cj, Language::Cmn);
let cj_jpn_docs = index.script_language_documents_ids(&rtxn, &key_jpn).unwrap().unwrap();
let cj_cmn_docs = index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap();
let expected_cj_jpn_docids = [3].iter().collect();
assert_eq!(cj_jpn_docs, expected_cj_jpn_docids);
let expected_cj_cmn_docids = [1, 5].iter().collect();
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
}
#[test]
fn add_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let (builder, removed) = builder.remove_documents(vec![S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 2,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
{"id":3,"name":"jean","age":25}
"###);
}
#[test]
fn add_update_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let documents = documents!([
{ "id": 2, "catto": "jorts" },
{ "id": 3, "legs": 4 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 5,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"jean","age":25,"legs":4}
"###);
}
#[test]
fn add_document_and_in_another_transform_update_and_delete_documents() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 3,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
{"id":2,"doggo":{"name":"bob","age":20}}
{"id":3,"name":"jean","age":25}
"###);
// A first batch of documents has been inserted
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 2, "catto": "jorts" },
{ "id": 3, "legs": 4 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 2,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"jean","age":25,"legs":4}
"###);
}
#[test]
fn delete_document_and_then_add_documents_in_the_same_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"0");
let documents = documents!([
{ "id": 2, "doggo": { "name": "jean", "age": 20 } },
{ "id": 3, "name": "bob", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 2,
number_of_documents: 2,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":2,"doggo":{"name":"jean","age":20}}
{"id":3,"name":"bob","age":25}
"###);
}
#[test]
fn delete_the_same_document_multiple_time() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) =
builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"0");
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "jean", "age": 20 } },
{ "id": 3, "name": "bob", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let (builder, removed) =
builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"bob","age":25}
"###);
}
#[test]
fn add_document_and_in_another_transform_delete_the_document_then_add_it_again() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 1,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
"###);
// A first batch of documents has been inserted
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"1");
let documents = documents!([
{ "id": 1, "catto": "jorts" },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 1,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"catto":"jorts"}
"###);
}
}

View File

@@ -12,9 +12,7 @@ use roaring::RoaringBitmap;
use serde_json::Value;
use smartstring::SmartString;
use super::helpers::{
create_sorter, create_writer, keep_latest_obkv, merge_obkvs_and_operations, MergeFn,
};
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
use super::{IndexDocumentsMethod, IndexerConfig};
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
@@ -52,12 +50,8 @@ pub struct Transform<'a, 'i> {
pub index_documents_method: IndexDocumentsMethod,
available_documents_ids: AvailableDocumentsIds,
// Both grenad follows the same format:
// key | value
// u32 | 1 byte for the Operation byte, the rest is the obkv of the document stored
original_sorter: grenad::Sorter<MergeFn>,
flattened_sorter: grenad::Sorter<MergeFn>,
replaced_documents_ids: RoaringBitmap,
new_documents_ids: RoaringBitmap,
// To increase the cache locality and decrease the heap usage we use compact smartstring.
@@ -65,14 +59,6 @@ pub struct Transform<'a, 'i> {
documents_count: usize,
}
/// This enum is specific to the grenad sorter stored in the transform.
/// It's used as the first byte of the grenads and tells you if the document id was an addition or a deletion.
#[repr(u8)]
pub enum Operation {
Addition,
Deletion,
}
/// Create a mapping between the field ids found in the document batch and the one that were
/// already present in the index.
///
@@ -108,7 +94,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// with the same user id must be merged or fully replaced in the same batch.
let merge_function = match index_documents_method {
IndexDocumentsMethod::ReplaceDocuments => keep_latest_obkv,
IndexDocumentsMethod::UpdateDocuments => merge_obkvs_and_operations,
IndexDocumentsMethod::UpdateDocuments => merge_obkvs,
};
// We initialize the sorter with the user indexing settings.
@@ -165,7 +151,9 @@ impl<'a, 'i> Transform<'a, 'i> {
FA: Fn() -> bool + Sync,
{
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
let primary_key = cursor.primary_key().to_string();
@@ -173,7 +161,6 @@ impl<'a, 'i> Transform<'a, 'i> {
self.fields_ids_map.insert(&primary_key).ok_or(UserError::AttributeLimitReached)?;
let mut obkv_buffer = Vec::new();
let mut document_sorter_buffer = Vec::new();
let mut documents_count = 0;
let mut docid_buffer: Vec<u8> = Vec::new();
let mut field_buffer: Vec<(u16, Cow<[u8]>)> = Vec::new();
@@ -225,13 +212,10 @@ impl<'a, 'i> Transform<'a, 'i> {
Entry::Occupied(entry) => *entry.get() as u32,
Entry::Vacant(entry) => {
// If the document was already in the db we mark it as a replaced document.
// It'll be deleted later.
// It'll be deleted later. We keep its original docid to insert it in the grenad.
if let Some(docid) = external_documents_ids.get(entry.key()) {
// If it was already in the list of replaced documents it means it was deleted
// by the remove_document method. We should starts as if it never existed.
if self.replaced_documents_ids.insert(docid) {
original_docid = Some(docid);
}
self.replaced_documents_ids.insert(docid);
original_docid = Some(docid);
}
let docid = self
.available_documents_ids
@@ -264,46 +248,26 @@ impl<'a, 'i> Transform<'a, 'i> {
skip_insertion = true;
} else {
// we associate the base document with the new key, everything will get merged later.
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(base_obkv);
self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
self.original_sorter.insert(docid.to_be_bytes(), base_obkv)?;
match self.flatten_from_fields_ids_map(KvReader::new(base_obkv))? {
Some(flattened_obkv) => {
// we recreate our buffer with the flattened documents
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&flattened_obkv);
self.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?
Some(buffer) => {
self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?
}
None => self
.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?,
None => self.flattened_sorter.insert(docid.to_be_bytes(), base_obkv)?,
}
}
}
if !skip_insertion {
self.new_documents_ids.insert(docid);
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&obkv_buffer);
// We use the extracted/generated user id as the key for this document.
self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
self.original_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?;
match self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))? {
Some(flattened_obkv) => {
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&flattened_obkv);
self.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?
Some(buffer) => self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?,
None => {
self.flattened_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?
}
None => self
.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?,
}
}
documents_count += 1;
@@ -329,73 +293,6 @@ impl<'a, 'i> Transform<'a, 'i> {
Ok(documents_count)
}
/// The counter part of `read_documents` that removes documents either from the transform or the database.
/// It can be called before, after or in between two calls of the `read_documents`.
///
/// It needs to update all the internal datastructure in the transform.
/// - If the document is coming from the database -> it's marked as a to_delete document
/// - If the document to remove was inserted by the `read_documents` method before AND was present in the db,
/// it's marked as `to_delete` + added into the grenad to ensure we don't reinsert it.
/// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db,
/// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids.
/// - If the document to remove was not present in either the db or the transform we do nothing.
pub fn remove_documents<FA>(
&mut self,
mut to_remove: Vec<String>,
wtxn: &mut heed::RwTxn,
should_abort: FA,
) -> Result<usize>
where
FA: Fn() -> bool + Sync,
{
// there may be duplicates in the documents to remove.
to_remove.sort_unstable();
to_remove.dedup();
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
let mut documents_deleted = 0;
for to_remove in to_remove {
if should_abort() {
return Err(Error::InternalError(InternalError::AbortedIndexation));
}
match self.new_external_documents_ids_builder.entry((*to_remove).into()) {
// if the document was added in a previous iteration of the transform we make it as deleted in the sorters.
Entry::Occupied(entry) => {
let doc_id = *entry.get() as u32;
self.original_sorter
.insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
self.flattened_sorter
.insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
// we must NOT update the list of replaced_documents_ids
// Either:
// 1. It's already in it and there is nothing to do
// 2. It wasn't in it because the document was created by a previous batch and since
// we're removing it there is nothing to do.
self.new_documents_ids.remove(doc_id);
entry.remove_entry();
}
Entry::Vacant(entry) => {
// If the document was already in the db we mark it as a `to_delete` document.
// It'll be deleted later. We don't need to push anything to the sorters.
if let Some(docid) = external_documents_ids.get(entry.key()) {
self.replaced_documents_ids.insert(docid);
} else {
// if the document is nowehere to be found, there is nothing to do and we must NOT
// increment the count of documents_deleted
continue;
}
}
};
documents_deleted += 1;
}
Ok(documents_deleted)
}
// Flatten a document from the fields ids map contained in self and insert the new
// created fields. Returns `None` if the document doesn't need to be flattened.
fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> {
@@ -590,11 +487,6 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut documents_count = 0;
while let Some((key, val)) = iter.next()? {
if val[0] == Operation::Deletion as u8 {
continue;
}
let val = &val[1..];
// send a callback to show at which step we are
documents_count += 1;
progress_callback(UpdateIndexingStep::ComputeIdsAndMergeDocuments {
@@ -626,18 +518,9 @@ impl<'a, 'i> Transform<'a, 'i> {
self.indexer_settings.chunk_compression_level,
tempfile::tempfile()?,
);
// Once we have written all the documents into the final sorter, we write the nested documents
// into this writer.
// We get rids of the `Operation` byte and skip the deleted documents as well.
let mut iter = self.flattened_sorter.into_stream_merger_iter()?;
while let Some((key, val)) = iter.next()? {
if val[0] == Operation::Deletion as u8 {
continue;
}
let val = &val[1..];
writer.insert(key, val)?;
}
// Once we have written all the documents into the final sorter, we write the documents
// into this writer, extract the file and reset the seek to be able to read it again.
self.flattened_sorter.write_into_stream_writer(&mut writer)?;
let mut flattened_documents = writer.into_inner()?;
flattened_documents.rewind()?;
@@ -818,45 +701,3 @@ impl TransformOutput {
.collect())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn merge_obkvs() {
let mut doc_0 = Vec::new();
let mut kv_writer = KvWriter::new(&mut doc_0);
kv_writer.insert(0_u8, [0]).unwrap();
kv_writer.finish().unwrap();
doc_0.insert(0, Operation::Addition as u8);
let ret = merge_obkvs_and_operations(&[], &[Cow::from(doc_0.as_slice())]).unwrap();
assert_eq!(*ret, doc_0);
let ret = merge_obkvs_and_operations(
&[],
&[Cow::from([Operation::Deletion as u8].as_slice()), Cow::from(doc_0.as_slice())],
)
.unwrap();
assert_eq!(*ret, doc_0);
let ret = merge_obkvs_and_operations(
&[],
&[Cow::from(doc_0.as_slice()), Cow::from([Operation::Deletion as u8].as_slice())],
)
.unwrap();
assert_eq!(*ret, [Operation::Deletion as u8]);
let ret = merge_obkvs_and_operations(
&[],
&[
Cow::from([Operation::Addition as u8, 1].as_slice()),
Cow::from([Operation::Deletion as u8].as_slice()),
Cow::from(doc_0.as_slice()),
],
)
.unwrap();
assert_eq!(*ret, doc_0);
}
}

View File

@@ -1,10 +1,8 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::convert::TryInto;
use std::fs::File;
use std::io;
use charabia::{Language, Script};
use grenad::MergerBuilder;
use heed::types::ByteSlice;
use heed::{BytesDecode, RwTxn};
@@ -40,7 +38,6 @@ pub(crate) enum TypedChunk {
FieldIdFacetNumberDocids(grenad::Reader<File>),
FieldIdFacetExistsDocids(grenad::Reader<File>),
GeoPoints(grenad::Reader<File>),
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
}
/// Write typed chunk in the corresponding LMDB database of the provided index.
@@ -213,24 +210,6 @@ pub(crate) fn write_typed_chunk_into_index(
index.put_geo_rtree(wtxn, &rtree)?;
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
}
TypedChunk::ScriptLanguageDocids(hash_pair) => {
let mut buffer = Vec::new();
for (key, value) in hash_pair {
buffer.clear();
let final_value = match index.script_language_docids.get(wtxn, &key)? {
Some(db_values) => {
let mut db_value_buffer = Vec::new();
serialize_roaring_bitmap(&db_values, &mut db_value_buffer)?;
let mut new_value_buffer = Vec::new();
serialize_roaring_bitmap(&value, &mut new_value_buffer)?;
merge_roaring_bitmaps(&new_value_buffer, &db_value_buffer, &mut buffer)?;
RoaringBitmap::deserialize_from(&buffer[..])?
}
None => value,
};
index.script_language_docids.put(wtxn, &key, &final_value)?;
}
}
}
Ok((RoaringBitmap::new(), is_merged_database))

Some files were not shown because too many files have changed in this diff Show More