mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-22 06:11:01 +00:00
Compare commits
96 Commits
dockerfile
...
v1.1.0-rc.
Author | SHA1 | Date | |
---|---|---|---|
c0ede6d152 | |||
577e7126f9 | |||
3d1046369c | |||
4f1ccbc495 | |||
37489fd495 | |||
d9e19c89c5 | |||
18bf740ee2 | |||
0202ff8ab4 | |||
fbe4ab158e | |||
92318ca573 | |||
6ca7a109b9 | |||
d4d4702f1b | |||
2648bbca25 | |||
562c86ea01 | |||
7ae10abb6b | |||
dc533584c6 | |||
442c1e36de | |||
66b5e4b548 | |||
89ac1015f3 | |||
ca25904c26 | |||
8a1b1a95f3 | |||
8d47d2d018 | |||
5082cd5e67 | |||
750a2b6842 | |||
bc7d4112d9 | |||
88a18677d0 | |||
68e30214ca | |||
b985b96e4e | |||
71e7900c67 | |||
431782f3ee | |||
3db613ff77 | |||
5822764be9 | |||
c63294f331 | |||
a529bf160c | |||
f1119f2dc2 | |||
1db7d5d851 | |||
80b060f920 | |||
fdf043580c | |||
f62703cd67 | |||
76f82c880d | |||
6eeba3a8ab | |||
28d6a4466d | |||
1ba2fae3ae | |||
28d6ab78de | |||
3ba5dfb6ec | |||
a23fbf6c7b | |||
596a98f7c6 | |||
14c4a222da | |||
690bb2e5cc | |||
d0f2c9c72e | |||
42577403d8 | |||
c8c5944094 | |||
4b65851793 | |||
10d4a1a9af | |||
ad35edfa32 | |||
033417e9cc | |||
ac5a1e4c4b | |||
3eb9a08b5c | |||
900bae3d9d | |||
28b7d73d4a | |||
6841f167b4 | |||
c88b6f331f | |||
09a94e0db3 | |||
39407885c2 | |||
a3e41ba33e | |||
ce807d760b | |||
bbecab8948 | |||
5cff435bf6 | |||
8aa808d51b | |||
23f4e82b53 | |||
119e6d8811 | |||
cb8d5f2d4b | |||
eb28d4c525 | |||
9ac981d025 | |||
74859ecd61 | |||
8ae441a4db | |||
042d86cbb3 | |||
91048d209d | |||
62358bd31c | |||
0bc1a18f52 | |||
643d99e0f9 | |||
064158e4e2 | |||
77d32d0ee8 | |||
f4569b04ad | |||
2922c5c899 | |||
7681be5367 | |||
50bc156257 | |||
d8207356f4 | |||
2d58b28f43 | |||
fd60a39f1c | |||
369c05732e | |||
34d04f3d3f | |||
a27f329e3a | |||
b216ddba63 | |||
d97fb6117e | |||
c45d1e3610 |
31
.github/uffizzi/Dockerfile
vendored
31
.github/uffizzi/Dockerfile
vendored
@ -1,24 +1,3 @@
|
|||||||
# Compile
|
|
||||||
FROM rust:alpine3.16 AS compiler
|
|
||||||
|
|
||||||
RUN apk add -q --update-cache --no-cache build-base openssl-dev
|
|
||||||
|
|
||||||
WORKDIR /meilisearch
|
|
||||||
|
|
||||||
ARG COMMIT_SHA
|
|
||||||
ARG COMMIT_DATE
|
|
||||||
ARG GIT_TAG
|
|
||||||
ENV COMMIT_SHA=${COMMIT_SHA} COMMIT_DATE=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
|
|
||||||
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
RUN set -eux; \
|
|
||||||
apkArch="$(apk --print-arch)"; \
|
|
||||||
if [ "$apkArch" = "aarch64" ]; then \
|
|
||||||
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
|
|
||||||
fi && \
|
|
||||||
cargo build --release
|
|
||||||
|
|
||||||
# Run
|
# Run
|
||||||
FROM uffizzi/ttyd:alpine
|
FROM uffizzi/ttyd:alpine
|
||||||
|
|
||||||
@ -29,19 +8,11 @@ ENV MEILI_NO_ANALYTICS true
|
|||||||
RUN apk update --quiet \
|
RUN apk update --quiet \
|
||||||
&& apk add -q --no-cache libgcc tini curl
|
&& apk add -q --no-cache libgcc tini curl
|
||||||
|
|
||||||
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
|
COPY target/x86_64-unknown-linux-musl/release/meilisearch /bin/meilisearch
|
||||||
# to find.
|
|
||||||
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
|
|
||||||
# To stay compatible with the older version of the container (pre v0.27.0) we're
|
|
||||||
# going to symlink the meilisearch binary in the path to `/meilisearch`
|
|
||||||
RUN ln -s /bin/meilisearch /meilisearch
|
RUN ln -s /bin/meilisearch /meilisearch
|
||||||
|
|
||||||
# This directory should hold all the data related to meilisearch so we're going
|
|
||||||
# to move our PWD in there.
|
|
||||||
# We don't want to put the meilisearch binary
|
|
||||||
WORKDIR /meili_data
|
WORKDIR /meili_data
|
||||||
|
|
||||||
|
|
||||||
EXPOSE 7700/tcp
|
EXPOSE 7700/tcp
|
||||||
|
|
||||||
ENTRYPOINT ["tini", "--"]
|
ENTRYPOINT ["tini", "--"]
|
||||||
|
20
.github/workflows/uffizzi-build.yml
vendored
20
.github/workflows/uffizzi-build.yml
vendored
@ -14,6 +14,26 @@ jobs:
|
|||||||
- name: checkout
|
- name: checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- run: sudo apt-get install musl-tools
|
||||||
|
|
||||||
|
- uses: actions-rs/toolchain@v1
|
||||||
|
with:
|
||||||
|
toolchain: stable
|
||||||
|
override: true
|
||||||
|
target: x86_64-unknown-linux-musl
|
||||||
|
|
||||||
|
- name: Cache dependencies
|
||||||
|
uses: Swatinem/rust-cache@v2.2.0
|
||||||
|
|
||||||
|
- name: Run cargo check without any default features
|
||||||
|
uses: actions-rs/cargo@v1
|
||||||
|
with:
|
||||||
|
command: build
|
||||||
|
args: --target x86_64-unknown-linux-musl --release
|
||||||
|
|
||||||
|
- name: Remove dockerignore so we can use the target folder in our docker build
|
||||||
|
run: rm -f .dockerignore
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v2
|
uses: docker/setup-qemu-action@v2
|
||||||
|
|
||||||
|
6
.github/workflows/uffizzi-preview-deploy.yml
vendored
6
.github/workflows/uffizzi-preview-deploy.yml
vendored
@ -82,7 +82,7 @@ jobs:
|
|||||||
name: Use Remote Workflow to Preview on Uffizzi
|
name: Use Remote Workflow to Preview on Uffizzi
|
||||||
needs:
|
needs:
|
||||||
- cache-compose-file
|
- cache-compose-file
|
||||||
uses: UffizziCloud/preview-action/.github/workflows/reusable.yaml@desc
|
uses: UffizziCloud/preview-action/.github/workflows/reusable.yaml@v2
|
||||||
with:
|
with:
|
||||||
# If this workflow was triggered by a PR close event, cache-key will be an empty string
|
# If this workflow was triggered by a PR close event, cache-key will be an empty string
|
||||||
# and this reusable workflow will delete the preview deployment.
|
# and this reusable workflow will delete the preview deployment.
|
||||||
@ -95,8 +95,8 @@ jobs:
|
|||||||
`meilisearch` command. You should be able to access this instance of meilisearch running in
|
`meilisearch` command. You should be able to access this instance of meilisearch running in
|
||||||
the preview from the link Meilisearch Endpoint link given below.
|
the preview from the link Meilisearch Endpoint link given below.
|
||||||
|
|
||||||
Web Terminal Endpoint : ${{ needs.cache-compose-file.outputs.expected-url }}
|
Web Terminal Endpoint : <uffizzi-url>
|
||||||
Meilisearch Endpoint : ${{ needs.cache-compose-file.outputs.expected-url }}/meilisearch
|
Meilisearch Endpoint : <uffizzi-url>/meilisearch
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
pull-requests: write
|
pull-requests: write
|
||||||
|
12
.github/workflows/update-cargo-toml-version.yml
vendored
12
.github/workflows/update-cargo-toml-version.yml
vendored
@ -1,4 +1,4 @@
|
|||||||
name: Update Meilisearch version in all Cargo.toml files
|
name: Update Meilisearch version in Cargo.toml
|
||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
@ -14,7 +14,7 @@ env:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
update-version-cargo-toml:
|
update-version-cargo-toml:
|
||||||
name: Update version in Cargo.toml files
|
name: Update version in Cargo.toml
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
@ -25,7 +25,7 @@ jobs:
|
|||||||
override: true
|
override: true
|
||||||
- name: Install sd
|
- name: Install sd
|
||||||
run: cargo install sd
|
run: cargo install sd
|
||||||
- name: Update Cargo.toml files
|
- name: Update Cargo.toml file
|
||||||
run: |
|
run: |
|
||||||
raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2)
|
raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2)
|
||||||
new_string="version = \"$raw_new_version\""
|
new_string="version = \"$raw_new_version\""
|
||||||
@ -35,13 +35,13 @@ jobs:
|
|||||||
- name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch
|
- name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch
|
||||||
uses: EndBug/add-and-commit@v9
|
uses: EndBug/add-and-commit@v9
|
||||||
with:
|
with:
|
||||||
message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml files"
|
message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml"
|
||||||
new_branch: ${{ env.NEW_BRANCH }}
|
new_branch: ${{ env.NEW_BRANCH }}
|
||||||
- name: Create the PR pointing to ${{ github.ref_name }}
|
- name: Create the PR pointing to ${{ github.ref_name }}
|
||||||
run: |
|
run: |
|
||||||
gh pr create \
|
gh pr create \
|
||||||
--title "Update version for the next release ($NEW_VERSION) in Cargo.toml files" \
|
--title "Update version for the next release ($NEW_VERSION) in Cargo.toml" \
|
||||||
--body '⚠️ This PR is automatically generated. Check the new version is the expected one before merging.' \
|
--body '⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.' \
|
||||||
--label 'skip changelog' \
|
--label 'skip changelog' \
|
||||||
--milestone $NEW_VERSION \
|
--milestone $NEW_VERSION \
|
||||||
--base $GITHUB_REF_NAME
|
--base $GITHUB_REF_NAME
|
||||||
|
197
Cargo.lock
generated
197
Cargo.lock
generated
@ -410,7 +410,7 @@ checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "benchmarks"
|
name = "benchmarks"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bytes",
|
"bytes",
|
||||||
@ -517,12 +517,6 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "build_const"
|
|
||||||
version = "0.2.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bumpalo"
|
name = "bumpalo"
|
||||||
version = "3.11.1"
|
version = "3.11.1"
|
||||||
@ -659,16 +653,19 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "charabia"
|
name = "charabia"
|
||||||
version = "0.7.0"
|
version = "0.7.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b57f9571f611796ea38e5a9c12e5ce37476f70397b032757f8dfe0c7b9bc5637"
|
checksum = "1ad3d9667a6b4e03813162c22c4d58235c2dc25d580d60837ce29199038341c9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cow-utils",
|
"cow-utils",
|
||||||
"csv",
|
"csv",
|
||||||
"deunicode",
|
"deunicode",
|
||||||
"fst",
|
"fst",
|
||||||
|
"irg-kvariants",
|
||||||
"jieba-rs",
|
"jieba-rs",
|
||||||
"lindera",
|
"lindera",
|
||||||
|
"lindera-ipadic",
|
||||||
|
"lindera-ko-dic",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pinyin",
|
"pinyin",
|
||||||
"serde",
|
"serde",
|
||||||
@ -721,14 +718,9 @@ version = "3.2.23"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5"
|
checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"atty",
|
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"clap_derive 3.2.18",
|
|
||||||
"clap_lex 0.2.4",
|
"clap_lex 0.2.4",
|
||||||
"indexmap",
|
"indexmap",
|
||||||
"once_cell",
|
|
||||||
"strsim",
|
|
||||||
"termcolor",
|
|
||||||
"textwrap",
|
"textwrap",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -739,7 +731,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39"
|
checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"clap_derive 4.0.21",
|
"clap_derive",
|
||||||
"clap_lex 0.3.0",
|
"clap_lex 0.3.0",
|
||||||
"is-terminal",
|
"is-terminal",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
@ -747,19 +739,6 @@ dependencies = [
|
|||||||
"termcolor",
|
"termcolor",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "clap_derive"
|
|
||||||
version = "3.2.18"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65"
|
|
||||||
dependencies = [
|
|
||||||
"heck",
|
|
||||||
"proc-macro-error",
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap_derive"
|
name = "clap_derive"
|
||||||
version = "4.0.21"
|
version = "4.0.21"
|
||||||
@ -873,15 +852,6 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crc"
|
|
||||||
version = "1.8.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb"
|
|
||||||
dependencies = [
|
|
||||||
"build_const",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crc32fast"
|
name = "crc32fast"
|
||||||
version = "1.3.2"
|
version = "1.3.2"
|
||||||
@ -1180,7 +1150,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dump"
|
name = "dump"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@ -1333,6 +1303,19 @@ dependencies = [
|
|||||||
"termcolor",
|
"termcolor",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "env_logger"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0"
|
||||||
|
dependencies = [
|
||||||
|
"humantime",
|
||||||
|
"is-terminal",
|
||||||
|
"log",
|
||||||
|
"regex",
|
||||||
|
"termcolor",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "errno"
|
name = "errno"
|
||||||
version = "0.2.8"
|
version = "0.2.8"
|
||||||
@ -1388,7 +1371,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "file-store"
|
name = "file-store"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"faux",
|
"faux",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
@ -1410,7 +1393,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filter-parser"
|
name = "filter-parser"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"nom",
|
"nom",
|
||||||
@ -1430,7 +1413,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flatten-serde-json"
|
name = "flatten-serde-json"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@ -1907,7 +1890,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "index-scheduler"
|
name = "index-scheduler"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@ -1921,6 +1904,7 @@ dependencies = [
|
|||||||
"insta",
|
"insta",
|
||||||
"log",
|
"log",
|
||||||
"meili-snap",
|
"meili-snap",
|
||||||
|
"meilisearch-auth",
|
||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"nelson",
|
"nelson",
|
||||||
"page_size 0.5.0",
|
"page_size 0.5.0",
|
||||||
@ -1986,6 +1970,17 @@ version = "2.7.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146"
|
checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "irg-kvariants"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c73214298363629cf9dbfc93b426808865ee3c121029778cb31b1284104fdf78"
|
||||||
|
dependencies = [
|
||||||
|
"csv",
|
||||||
|
"once_cell",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "is-terminal"
|
name = "is-terminal"
|
||||||
version = "0.4.2"
|
version = "0.4.2"
|
||||||
@ -2054,7 +2049,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-depth-checker"
|
name = "json-depth-checker"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@ -2074,6 +2069,15 @@ dependencies = [
|
|||||||
"simple_asn1",
|
"simple_asn1",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "kanaria"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c0f9d9652540055ac4fded998a73aca97d965899077ab1212587437da44196ff"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "language-tags"
|
name = "language-tags"
|
||||||
version = "0.3.2"
|
version = "0.3.2"
|
||||||
@ -2143,14 +2147,15 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lindera"
|
name = "lindera"
|
||||||
version = "0.17.0"
|
version = "0.21.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "082ca91ac4d1557028ace9bfb8cee1500d156a4574dda93cfcdcf4caaebb9bd7"
|
checksum = "0f33a20bb9cbf95572b2d2f40d7040c8d8c7ad09ae20e1f6513db6ef2564dfc5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bincode",
|
"bincode",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"encoding",
|
"encoding",
|
||||||
|
"kanaria",
|
||||||
"lindera-cc-cedict-builder",
|
"lindera-cc-cedict-builder",
|
||||||
"lindera-core",
|
"lindera-core",
|
||||||
"lindera-dictionary",
|
"lindera-dictionary",
|
||||||
@ -2159,24 +2164,27 @@ dependencies = [
|
|||||||
"lindera-ko-dic",
|
"lindera-ko-dic",
|
||||||
"lindera-ko-dic-builder",
|
"lindera-ko-dic-builder",
|
||||||
"lindera-unidic-builder",
|
"lindera-unidic-builder",
|
||||||
|
"regex",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
|
"unicode-blocks",
|
||||||
|
"unicode-normalization",
|
||||||
|
"yada",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lindera-cc-cedict-builder"
|
name = "lindera-cc-cedict-builder"
|
||||||
version = "0.17.0"
|
version = "0.21.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a8967615a6d85320ec2755e1435c36165467ba01a79026adc3f86dad1b668df3"
|
checksum = "60c3b379251edadbac7a5fdb31e482274e11dae6ab6cc789d0d86cf34369cf49"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bincode",
|
"bincode",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"clap 3.2.23",
|
|
||||||
"csv",
|
"csv",
|
||||||
"encoding",
|
"encoding",
|
||||||
"env_logger",
|
"env_logger 0.10.0",
|
||||||
"glob",
|
"glob",
|
||||||
"lindera-core",
|
"lindera-core",
|
||||||
"lindera-decompress",
|
"lindera-decompress",
|
||||||
@ -2185,16 +2193,28 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lindera-core"
|
name = "lindera-compress"
|
||||||
version = "0.17.0"
|
version = "0.21.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0e8ed3cea13f73557a4574a179b1518670a3b70bfdad120521313b03cc89380e"
|
checksum = "a8d0ea3de5625e2381cac94e518d3b56103fde56bc0dce840fe875c1e871b125"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"flate2",
|
||||||
|
"lindera-decompress",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lindera-core"
|
||||||
|
version = "0.21.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2281747b98fdd46bcc54ce7fdb6870dad9f67ddb3dc086c47b6704f3e1178cd5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bincode",
|
"bincode",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
"log",
|
"log",
|
||||||
|
"once_cell",
|
||||||
"serde",
|
"serde",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"yada",
|
"yada",
|
||||||
@ -2202,20 +2222,20 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lindera-decompress"
|
name = "lindera-decompress"
|
||||||
version = "0.17.0"
|
version = "0.21.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2badb41828f89cfa6452db0a66da77897c0a04478304de26c8b2b36613e08d43"
|
checksum = "52101bd454754c506305ab897af5ac2ae41fe91e3272c1ff5c6a02a089dfaefd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"lzma-rs",
|
"flate2",
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lindera-dictionary"
|
name = "lindera-dictionary"
|
||||||
version = "0.17.0"
|
version = "0.21.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e219722c9f56b920c231210e7c25d8b5d35b508e7a2fd69d368916c4b1c926f6"
|
checksum = "af1c6668848f1d30d216c99093a3ed3fe125c105fa12a4aeed5a1861dc01dd52"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bincode",
|
"bincode",
|
||||||
@ -2225,15 +2245,16 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lindera-ipadic"
|
name = "lindera-ipadic"
|
||||||
version = "0.17.0"
|
version = "0.21.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2c8e87c8362c724e8188fb7d9b6d184cac15d01369295e9bff7812b630d57e3b"
|
checksum = "693098007200fa43fd5cdc9ca8740f371327369672ce812cd87a1f6344971e31"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bincode",
|
"bincode",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"encoding",
|
"encoding",
|
||||||
"flate2",
|
"flate2",
|
||||||
"lindera-core",
|
"lindera-core",
|
||||||
|
"lindera-decompress",
|
||||||
"lindera-ipadic-builder",
|
"lindera-ipadic-builder",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"tar",
|
"tar",
|
||||||
@ -2241,19 +2262,19 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lindera-ipadic-builder"
|
name = "lindera-ipadic-builder"
|
||||||
version = "0.17.0"
|
version = "0.21.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1439e95852e444a116424086dc64d709c90e8af269ff7d2c2c4020f666f8dfab"
|
checksum = "7b6b7240d097a8fc37ee8f90ebff02c4db0ba5325ecb0dacb6da3724596798c9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bincode",
|
"bincode",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"clap 3.2.23",
|
|
||||||
"csv",
|
"csv",
|
||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
"encoding_rs_io",
|
"encoding_rs_io",
|
||||||
"env_logger",
|
"env_logger 0.10.0",
|
||||||
"glob",
|
"glob",
|
||||||
|
"lindera-compress",
|
||||||
"lindera-core",
|
"lindera-core",
|
||||||
"lindera-decompress",
|
"lindera-decompress",
|
||||||
"log",
|
"log",
|
||||||
@ -2263,15 +2284,16 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lindera-ko-dic"
|
name = "lindera-ko-dic"
|
||||||
version = "0.17.0"
|
version = "0.21.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cb15f949220da45872d774b7831bb030855ec083435c907499782f8558c8a203"
|
checksum = "abd3c5a4addeb61ca66788a3dd1fd51093e6cd8fea1d997042ada5aa60e8cc5e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bincode",
|
"bincode",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"encoding",
|
"encoding",
|
||||||
"flate2",
|
"flate2",
|
||||||
"lindera-core",
|
"lindera-core",
|
||||||
|
"lindera-decompress",
|
||||||
"lindera-ko-dic-builder",
|
"lindera-ko-dic-builder",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"tar",
|
"tar",
|
||||||
@ -2279,18 +2301,18 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lindera-ko-dic-builder"
|
name = "lindera-ko-dic-builder"
|
||||||
version = "0.17.0"
|
version = "0.21.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fde5a7352f4754be4f741e90bf4dff38a12a6572ab3880d0cf688e1166b8d82b"
|
checksum = "512bb1393a9281e0b13704319d1343b7931416865852d9d7b7c0178431518326"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bincode",
|
"bincode",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"clap 3.2.23",
|
|
||||||
"csv",
|
"csv",
|
||||||
"encoding",
|
"encoding",
|
||||||
"env_logger",
|
"env_logger 0.10.0",
|
||||||
"glob",
|
"glob",
|
||||||
|
"lindera-compress",
|
||||||
"lindera-core",
|
"lindera-core",
|
||||||
"lindera-decompress",
|
"lindera-decompress",
|
||||||
"log",
|
"log",
|
||||||
@ -2299,17 +2321,16 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lindera-unidic-builder"
|
name = "lindera-unidic-builder"
|
||||||
version = "0.17.0"
|
version = "0.21.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f1451b2ed8a7184a5f815d84f99d358c1d67297305831453dfdc0eb5d08e22b5"
|
checksum = "7f575a27f8ba67c15fe16ebf7d277a0ac04e8c8a0f72670ebc2443da9d41c450"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bincode",
|
"bincode",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"clap 3.2.23",
|
|
||||||
"csv",
|
"csv",
|
||||||
"encoding",
|
"encoding",
|
||||||
"env_logger",
|
"env_logger 0.10.0",
|
||||||
"glob",
|
"glob",
|
||||||
"lindera-core",
|
"lindera-core",
|
||||||
"lindera-decompress",
|
"lindera-decompress",
|
||||||
@ -2398,16 +2419,6 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "lzma-rs"
|
|
||||||
version = "0.2.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "aba8ecb0450dfabce4ad72085eed0a75dffe8f21f7ada05638564ea9db2d7fb1"
|
|
||||||
dependencies = [
|
|
||||||
"byteorder",
|
|
||||||
"crc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "manifest-dir-macros"
|
name = "manifest-dir-macros"
|
||||||
version = "0.1.16"
|
version = "0.1.16"
|
||||||
@ -2434,7 +2445,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meili-snap"
|
name = "meili-snap"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"md5",
|
"md5",
|
||||||
@ -2443,7 +2454,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch"
|
name = "meilisearch"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-http",
|
"actix-http",
|
||||||
@ -2466,7 +2477,7 @@ dependencies = [
|
|||||||
"deserr",
|
"deserr",
|
||||||
"dump",
|
"dump",
|
||||||
"either",
|
"either",
|
||||||
"env_logger",
|
"env_logger 0.9.3",
|
||||||
"file-store",
|
"file-store",
|
||||||
"flate2",
|
"flate2",
|
||||||
"fst",
|
"fst",
|
||||||
@ -2531,7 +2542,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-auth"
|
name = "meilisearch-auth"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.13.1",
|
"base64 0.13.1",
|
||||||
"enum-iterator",
|
"enum-iterator",
|
||||||
@ -2550,7 +2561,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-types"
|
name = "meilisearch-types"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@ -2604,7 +2615,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"big_s",
|
"big_s",
|
||||||
"bimap",
|
"bimap",
|
||||||
@ -2958,7 +2969,7 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "permissive-json-pointer"
|
name = "permissive-json-pointer"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"big_s",
|
"big_s",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@ -4011,6 +4022,12 @@ version = "0.3.8"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"
|
checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-blocks"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9de2be6bad6f56ce8373d377e611cbb2265de3a656138065609ce82e217aad70"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-ident"
|
name = "unicode-ident"
|
||||||
version = "1.0.6"
|
version = "1.0.6"
|
||||||
|
@ -17,7 +17,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||||
description = "Meilisearch HTTP server"
|
description = "Meilisearch HTTP server"
|
||||||
homepage = "https://meilisearch.com"
|
homepage = "https://meilisearch.com"
|
||||||
|
16
Dockerfile
16
Dockerfile
@ -1,5 +1,7 @@
|
|||||||
# Compile
|
# Compile
|
||||||
FROM rust:bullseye AS compiler
|
FROM rust:alpine3.16 AS compiler
|
||||||
|
|
||||||
|
RUN apk add -q --update-cache --no-cache build-base openssl-dev
|
||||||
|
|
||||||
WORKDIR /meilisearch
|
WORKDIR /meilisearch
|
||||||
|
|
||||||
@ -11,22 +13,20 @@ ENV RUSTFLAGS="-C target-feature=-crt-static"
|
|||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN set -eux; \
|
RUN set -eux; \
|
||||||
arch="$(dpkg --print-architecture)"; \
|
apkArch="$(apk --print-arch)"; \
|
||||||
if [ "$arch" = "arm64" ]; then \
|
if [ "$apkArch" = "aarch64" ]; then \
|
||||||
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
|
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
|
||||||
fi && \
|
fi && \
|
||||||
cargo build --release
|
cargo build --release
|
||||||
|
|
||||||
# Run
|
# Run
|
||||||
FROM debian:11.6
|
FROM alpine:3.16
|
||||||
|
|
||||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||||
ENV MEILI_SERVER_PROVIDER docker
|
ENV MEILI_SERVER_PROVIDER docker
|
||||||
|
|
||||||
RUN set -ex; \
|
RUN apk update --quiet \
|
||||||
apt-get update -q; \
|
&& apk add -q --no-cache libgcc tini curl
|
||||||
apt-get install -q -y --no-install-recommends tini; \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
|
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
|
||||||
# to find.
|
# to find.
|
||||||
|
@ -29,7 +29,7 @@ fn bench_formatting(c: &mut criterion::Criterion) {
|
|||||||
(vec![Rc::new(MatchingWord::new("thedoord".to_string(), 1, true).unwrap())], vec![0, 1, 2]),
|
(vec![Rc::new(MatchingWord::new("thedoord".to_string(), 1, true).unwrap())], vec![0, 1, 2]),
|
||||||
(vec![Rc::new(MatchingWord::new("doord".to_string(), 1, true).unwrap())], vec![1, 2]),
|
(vec![Rc::new(MatchingWord::new("doord".to_string(), 1, true).unwrap())], vec![1, 2]),
|
||||||
]
|
]
|
||||||
), TokenizerBuilder::default().build()),
|
).unwrap(), TokenizerBuilder::default().build()),
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
|
10
config.toml
10
config.toml
@ -118,3 +118,13 @@ ssl_resumption = false
|
|||||||
ssl_tickets = false
|
ssl_tickets = false
|
||||||
# Activates SSL tickets.
|
# Activates SSL tickets.
|
||||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-tickets
|
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-tickets
|
||||||
|
|
||||||
|
#############################
|
||||||
|
### Experimental features ###
|
||||||
|
#############################
|
||||||
|
|
||||||
|
experimental_enable_metrics = false
|
||||||
|
# Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
|
||||||
|
# Enables the Prometheus metrics on the `GET /metrics` endpoint.
|
||||||
|
|
||||||
|
|
||||||
|
@ -116,10 +116,20 @@ impl FileStore {
|
|||||||
|
|
||||||
/// List the Uuids of the files in the FileStore
|
/// List the Uuids of the files in the FileStore
|
||||||
pub fn all_uuids(&self) -> Result<impl Iterator<Item = Result<Uuid>>> {
|
pub fn all_uuids(&self) -> Result<impl Iterator<Item = Result<Uuid>>> {
|
||||||
Ok(self.path.read_dir()?.map(|entry| {
|
Ok(self.path.read_dir()?.filter_map(|entry| {
|
||||||
Ok(Uuid::from_str(
|
let file_name = match entry {
|
||||||
entry?.file_name().to_str().ok_or(Error::CouldNotParseFileNameAsUtf8)?,
|
Ok(entry) => entry.file_name(),
|
||||||
)?)
|
Err(e) => return Some(Err(e.into())),
|
||||||
|
};
|
||||||
|
let file_name = match file_name.to_str() {
|
||||||
|
Some(file_name) => file_name,
|
||||||
|
None => return Some(Err(Error::CouldNotParseFileNameAsUtf8)),
|
||||||
|
};
|
||||||
|
if file_name.starts_with('.') {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(Uuid::from_str(file_name).map_err(|e| e.into()))
|
||||||
|
}
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -135,3 +145,34 @@ impl File {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use std::io::Write;
|
||||||
|
|
||||||
|
use tempfile::TempDir;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn all_uuids() {
|
||||||
|
let dir = TempDir::new().unwrap();
|
||||||
|
let fs = FileStore::new(dir.path()).unwrap();
|
||||||
|
let (uuid, mut file) = fs.new_update().unwrap();
|
||||||
|
file.write_all(b"Hello world").unwrap();
|
||||||
|
file.persist().unwrap();
|
||||||
|
let all_uuids = fs.all_uuids().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||||
|
assert_eq!(all_uuids, vec![uuid]);
|
||||||
|
|
||||||
|
let (uuid2, file) = fs.new_update().unwrap();
|
||||||
|
let all_uuids = fs.all_uuids().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||||
|
assert_eq!(all_uuids, vec![uuid]);
|
||||||
|
|
||||||
|
file.persist().unwrap();
|
||||||
|
let mut all_uuids = fs.all_uuids().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||||
|
all_uuids.sort();
|
||||||
|
let mut expected = vec![uuid, uuid2];
|
||||||
|
expected.sort();
|
||||||
|
assert_eq!(all_uuids, expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -19,6 +19,7 @@ dump = { path = "../dump" }
|
|||||||
enum-iterator = "1.1.3"
|
enum-iterator = "1.1.3"
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
page_size = "0.5.0"
|
page_size = "0.5.0"
|
||||||
roaring = { version = "0.10.0", features = ["serde"] }
|
roaring = { version = "0.10.0", features = ["serde"] }
|
||||||
|
@ -788,15 +788,15 @@ impl IndexScheduler {
|
|||||||
dump_tasks.flush()?;
|
dump_tasks.flush()?;
|
||||||
|
|
||||||
// 3. Dump the indexes
|
// 3. Dump the indexes
|
||||||
for (uid, index) in self.index_mapper.indexes(&rtxn)? {
|
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
let metadata = IndexMetadata {
|
let metadata = IndexMetadata {
|
||||||
uid: uid.clone(),
|
uid: uid.to_owned(),
|
||||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||||
created_at: index.created_at(&rtxn)?,
|
created_at: index.created_at(&rtxn)?,
|
||||||
updated_at: index.updated_at(&rtxn)?,
|
updated_at: index.updated_at(&rtxn)?,
|
||||||
};
|
};
|
||||||
let mut index_dumper = dump.create_index(&uid, &metadata)?;
|
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
@ -809,9 +809,10 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 3.2. Dump the settings
|
// 3.2. Dump the settings
|
||||||
let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
|
let settings = meilisearch_types::settings::settings(index, &rtxn)?;
|
||||||
index_dumper.settings(&settings)?;
|
index_dumper.settings(&settings)?;
|
||||||
}
|
Ok(())
|
||||||
|
})?;
|
||||||
|
|
||||||
let dump_uid = started_at.format(format_description!(
|
let dump_uid = started_at.format(format_description!(
|
||||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||||
|
370
index-scheduler/src/index_mapper/index_map.rs
Normal file
370
index-scheduler/src/index_mapper/index_map.rs
Normal file
@ -0,0 +1,370 @@
|
|||||||
|
/// the map size to use when we don't succeed in reading it in indexes.
|
||||||
|
const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
|
||||||
|
use meilisearch_types::milli::Index;
|
||||||
|
use time::OffsetDateTime;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use super::IndexStatus::{self, Available, BeingDeleted, Closing, Missing};
|
||||||
|
use crate::lru::{InsertionOutcome, LruMap};
|
||||||
|
use crate::{clamp_to_page_size, Result};
|
||||||
|
|
||||||
|
/// Keep an internally consistent view of the open indexes in memory.
|
||||||
|
///
|
||||||
|
/// This view is made of an LRU cache that will evict the least frequently used indexes when new indexes are opened.
|
||||||
|
/// Indexes that are being closed (for resizing or due to cache eviction) or deleted cannot be evicted from the cache and
|
||||||
|
/// are stored separately.
|
||||||
|
///
|
||||||
|
/// This view provides operations to change the state of the index as it is known in memory:
|
||||||
|
/// open an index (making it available for queries), close an index (specifying the new size it should be opened with),
|
||||||
|
/// delete an index.
|
||||||
|
///
|
||||||
|
/// External consistency with the other bits of data of an index is provided by the `IndexMapper` parent structure.
|
||||||
|
pub struct IndexMap {
|
||||||
|
/// A LRU map of indexes that are in the open state and available for queries.
|
||||||
|
available: LruMap<Uuid, Index>,
|
||||||
|
/// A map of indexes that are not available for queries, either because they are being deleted
|
||||||
|
/// or because they are being closed.
|
||||||
|
///
|
||||||
|
/// If they are being deleted, the UUID points to `None`.
|
||||||
|
unavailable: BTreeMap<Uuid, Option<ClosingIndex>>,
|
||||||
|
|
||||||
|
/// A monotonically increasing generation number, used to differentiate between multiple successive index closing requests.
|
||||||
|
///
|
||||||
|
/// Because multiple readers could be waiting on an index to close, the following could theoretically happen:
|
||||||
|
///
|
||||||
|
/// 1. Multiple readers wait for the index closing to occur.
|
||||||
|
/// 2. One of them "wins the race", takes the lock and then removes the index that finished closing from the map.
|
||||||
|
/// 3. The index is reopened, but must be closed again (such as being resized again).
|
||||||
|
/// 4. One reader that "lost the race" in (2) wakes up and tries to take the lock and remove the index from the map.
|
||||||
|
///
|
||||||
|
/// In that situation, the index may or may not have finished closing. The `generation` field allows to remember which
|
||||||
|
/// closing request was made, so the reader that "lost the race" has the old generation and will need to wait again for the index
|
||||||
|
/// to close.
|
||||||
|
generation: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct ClosingIndex {
|
||||||
|
uuid: Uuid,
|
||||||
|
closing_event: EnvClosingEvent,
|
||||||
|
map_size: usize,
|
||||||
|
generation: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClosingIndex {
|
||||||
|
/// Waits for the index to be definitely closed.
|
||||||
|
///
|
||||||
|
/// To avoid blocking, users should relinquish their locks to the IndexMap before calling this function.
|
||||||
|
///
|
||||||
|
/// After the index is physically closed, the in memory map must still be updated to take this into account.
|
||||||
|
/// To do so, a `ReopenableIndex` is returned, that can be used to either definitely close or definitely open
|
||||||
|
/// the index without waiting anymore.
|
||||||
|
pub fn wait_timeout(self, timeout: Duration) -> Option<ReopenableIndex> {
|
||||||
|
self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex {
|
||||||
|
uuid: self.uuid,
|
||||||
|
map_size: self.map_size,
|
||||||
|
generation: self.generation,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ReopenableIndex {
|
||||||
|
uuid: Uuid,
|
||||||
|
map_size: usize,
|
||||||
|
generation: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReopenableIndex {
|
||||||
|
/// Attempts to reopen the index, which can result in the index being reopened again or not
|
||||||
|
/// (e.g. if another thread already opened and closed the index again).
|
||||||
|
///
|
||||||
|
/// Use get again on the IndexMap to get the updated status.
|
||||||
|
///
|
||||||
|
/// Fails if the underlying index creation fails.
|
||||||
|
///
|
||||||
|
/// # Status table
|
||||||
|
///
|
||||||
|
/// | Previous Status | New Status |
|
||||||
|
/// |-----------------|----------------------------------------------|
|
||||||
|
/// | Missing | Missing |
|
||||||
|
/// | BeingDeleted | BeingDeleted |
|
||||||
|
/// | Closing | Available or Closing depending on generation |
|
||||||
|
/// | Available | Available |
|
||||||
|
///
|
||||||
|
pub fn reopen(self, map: &mut IndexMap, path: &Path) -> Result<()> {
|
||||||
|
if let Closing(reopen) = map.get(&self.uuid) {
|
||||||
|
if reopen.generation != self.generation {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
map.unavailable.remove(&self.uuid);
|
||||||
|
map.create(&self.uuid, path, None, self.map_size)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attempts to close the index, which may or may not result in the index being closed
|
||||||
|
/// (e.g. if another thread already reopened the index again).
|
||||||
|
///
|
||||||
|
/// Use get again on the IndexMap to get the updated status.
|
||||||
|
///
|
||||||
|
/// # Status table
|
||||||
|
///
|
||||||
|
/// | Previous Status | New Status |
|
||||||
|
/// |-----------------|--------------------------------------------|
|
||||||
|
/// | Missing | Missing |
|
||||||
|
/// | BeingDeleted | BeingDeleted |
|
||||||
|
/// | Closing | Missing or Closing depending on generation |
|
||||||
|
/// | Available | Available |
|
||||||
|
pub fn close(self, map: &mut IndexMap) {
|
||||||
|
if let Closing(reopen) = map.get(&self.uuid) {
|
||||||
|
if reopen.generation != self.generation {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
map.unavailable.remove(&self.uuid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IndexMap {
|
||||||
|
pub fn new(cap: usize) -> IndexMap {
|
||||||
|
Self { unavailable: Default::default(), available: LruMap::new(cap), generation: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets the current status of an index in the map.
|
||||||
|
///
|
||||||
|
/// If the index is available it can be accessed from the returned status.
|
||||||
|
pub fn get(&self, uuid: &Uuid) -> IndexStatus {
|
||||||
|
self.available
|
||||||
|
.get(uuid)
|
||||||
|
.map(|index| Available(index.clone()))
|
||||||
|
.unwrap_or_else(|| self.get_unavailable(uuid))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_unavailable(&self, uuid: &Uuid) -> IndexStatus {
|
||||||
|
match self.unavailable.get(uuid) {
|
||||||
|
Some(Some(reopen)) => Closing(reopen.clone()),
|
||||||
|
Some(None) => BeingDeleted,
|
||||||
|
None => Missing,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attempts to create a new index that wasn't existing before.
|
||||||
|
///
|
||||||
|
/// # Status table
|
||||||
|
///
|
||||||
|
/// | Previous Status | New Status |
|
||||||
|
/// |-----------------|------------|
|
||||||
|
/// | Missing | Available |
|
||||||
|
/// | BeingDeleted | panics |
|
||||||
|
/// | Closing | panics |
|
||||||
|
/// | Available | panics |
|
||||||
|
///
|
||||||
|
pub fn create(
|
||||||
|
&mut self,
|
||||||
|
uuid: &Uuid,
|
||||||
|
path: &Path,
|
||||||
|
date: Option<(OffsetDateTime, OffsetDateTime)>,
|
||||||
|
map_size: usize,
|
||||||
|
) -> Result<Index> {
|
||||||
|
if !matches!(self.get_unavailable(uuid), Missing) {
|
||||||
|
panic!("Attempt to open an index that was unavailable");
|
||||||
|
}
|
||||||
|
let index = create_or_open_index(path, date, map_size)?;
|
||||||
|
match self.available.insert(*uuid, index.clone()) {
|
||||||
|
InsertionOutcome::InsertedNew => (),
|
||||||
|
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
|
||||||
|
self.close(evicted_uuid, evicted_index, 0);
|
||||||
|
}
|
||||||
|
InsertionOutcome::Replaced(_) => {
|
||||||
|
panic!("Attempt to open an index that was already opened")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(index)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Increases the current generation. See documentation for this field.
|
||||||
|
///
|
||||||
|
/// In the unlikely event that the 2^64 generations would have been exhausted, we simply wrap-around.
|
||||||
|
///
|
||||||
|
/// For this to cause an issue, one should be able to stop a reader in time after it got a `ReopenableIndex` and before it takes the lock
|
||||||
|
/// to remove it from the unavailable map, and keep the reader in this frozen state for 2^64 closing of other indexes.
|
||||||
|
///
|
||||||
|
/// This seems overwhelmingly impossible to achieve in practice.
|
||||||
|
fn next_generation(&mut self) -> usize {
|
||||||
|
self.generation = self.generation.wrapping_add(1);
|
||||||
|
self.generation
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attempts to close an index.
|
||||||
|
///
|
||||||
|
/// # Status table
|
||||||
|
///
|
||||||
|
/// | Previous Status | New Status |
|
||||||
|
/// |-----------------|---------------|
|
||||||
|
/// | Missing | Missing |
|
||||||
|
/// | BeingDeleted | BeingDeleted |
|
||||||
|
/// | Closing | Closing |
|
||||||
|
/// | Available | Closing |
|
||||||
|
///
|
||||||
|
pub fn close_for_resize(&mut self, uuid: &Uuid, map_size_growth: usize) {
|
||||||
|
let Some(index) = self.available.remove(uuid) else { return; };
|
||||||
|
self.close(*uuid, index, map_size_growth);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn close(&mut self, uuid: Uuid, index: Index, map_size_growth: usize) {
|
||||||
|
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
|
||||||
|
let closing_event = index.prepare_for_closing();
|
||||||
|
let generation = self.next_generation();
|
||||||
|
self.unavailable
|
||||||
|
.insert(uuid, Some(ClosingIndex { uuid, closing_event, map_size, generation }));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attempts to delete and index.
|
||||||
|
///
|
||||||
|
/// `end_deletion` must be called just after.
|
||||||
|
///
|
||||||
|
/// # Status table
|
||||||
|
///
|
||||||
|
/// | Previous Status | New Status | Return value |
|
||||||
|
/// |-----------------|--------------|-----------------------------|
|
||||||
|
/// | Missing | BeingDeleted | Ok(None) |
|
||||||
|
/// | BeingDeleted | BeingDeleted | Err(None) |
|
||||||
|
/// | Closing | Closing | Err(Some(reopen)) |
|
||||||
|
/// | Available | BeingDeleted | Ok(Some(env_closing_event)) |
|
||||||
|
pub fn start_deletion(
|
||||||
|
&mut self,
|
||||||
|
uuid: &Uuid,
|
||||||
|
) -> std::result::Result<Option<EnvClosingEvent>, Option<ClosingIndex>> {
|
||||||
|
if let Some(index) = self.available.remove(uuid) {
|
||||||
|
self.unavailable.insert(*uuid, None);
|
||||||
|
return Ok(Some(index.prepare_for_closing()));
|
||||||
|
}
|
||||||
|
match self.unavailable.remove(uuid) {
|
||||||
|
Some(Some(reopen)) => Err(Some(reopen)),
|
||||||
|
Some(None) => Err(None),
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Marks that an index deletion finished.
|
||||||
|
///
|
||||||
|
/// Must be used after calling `start_deletion`.
|
||||||
|
///
|
||||||
|
/// # Status table
|
||||||
|
///
|
||||||
|
/// | Previous Status | New Status |
|
||||||
|
/// |-----------------|------------|
|
||||||
|
/// | Missing | Missing |
|
||||||
|
/// | BeingDeleted | Missing |
|
||||||
|
/// | Closing | panics |
|
||||||
|
/// | Available | panics |
|
||||||
|
pub fn end_deletion(&mut self, uuid: &Uuid) {
|
||||||
|
assert!(
|
||||||
|
self.available.get(uuid).is_none(),
|
||||||
|
"Attempt to finish deletion of an index that was not being deleted"
|
||||||
|
);
|
||||||
|
// Do not panic if the index was Missing or BeingDeleted
|
||||||
|
assert!(
|
||||||
|
!matches!(self.unavailable.remove(uuid), Some(Some(_))),
|
||||||
|
"Attempt to finish deletion of an index that was being closed"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create or open an index in the specified path.
|
||||||
|
/// The path *must* exist or an error will be thrown.
|
||||||
|
fn create_or_open_index(
|
||||||
|
path: &Path,
|
||||||
|
date: Option<(OffsetDateTime, OffsetDateTime)>,
|
||||||
|
map_size: usize,
|
||||||
|
) -> Result<Index> {
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(clamp_to_page_size(map_size));
|
||||||
|
options.max_readers(1024);
|
||||||
|
|
||||||
|
if let Some((created, updated)) = date {
|
||||||
|
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
|
||||||
|
} else {
|
||||||
|
Ok(Index::new(options, path)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Putting the tests of the LRU down there so we have access to the cache's private members
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
|
||||||
|
use meilisearch_types::heed::Env;
|
||||||
|
use meilisearch_types::Index;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use super::super::IndexMapper;
|
||||||
|
use crate::tests::IndexSchedulerHandle;
|
||||||
|
use crate::utils::clamp_to_page_size;
|
||||||
|
use crate::IndexScheduler;
|
||||||
|
|
||||||
|
impl IndexMapper {
|
||||||
|
fn test() -> (Self, Env, IndexSchedulerHandle) {
|
||||||
|
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
|
||||||
|
(index_scheduler.index_mapper, index_scheduler.env, handle)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_first_unavailable(mapper: &IndexMapper, expected_uuid: Uuid, is_closing: bool) {
|
||||||
|
let index_map = mapper.index_map.read().unwrap();
|
||||||
|
let (uuid, state) = index_map.unavailable.first_key_value().unwrap();
|
||||||
|
assert_eq!(uuid, &expected_uuid);
|
||||||
|
assert_eq!(state.is_some(), is_closing);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn evict_indexes() {
|
||||||
|
let (mapper, env, _handle) = IndexMapper::test();
|
||||||
|
let mut uuids = vec![];
|
||||||
|
// LRU cap + 1
|
||||||
|
for i in 0..(5 + 1) {
|
||||||
|
let index_name = format!("index-{i}");
|
||||||
|
let wtxn = env.write_txn().unwrap();
|
||||||
|
mapper.create_index(wtxn, &index_name, None).unwrap();
|
||||||
|
let txn = env.read_txn().unwrap();
|
||||||
|
uuids.push(mapper.index_mapping.get(&txn, &index_name).unwrap().unwrap());
|
||||||
|
}
|
||||||
|
// index-0 was evicted
|
||||||
|
check_first_unavailable(&mapper, uuids[0], true);
|
||||||
|
|
||||||
|
// get back the evicted index
|
||||||
|
let wtxn = env.write_txn().unwrap();
|
||||||
|
mapper.create_index(wtxn, "index-0", None).unwrap();
|
||||||
|
|
||||||
|
// Least recently used is now index-1
|
||||||
|
check_first_unavailable(&mapper, uuids[1], true);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resize_index() {
|
||||||
|
let (mapper, env, _handle) = IndexMapper::test();
|
||||||
|
let index = mapper.create_index(env.write_txn().unwrap(), "index", None).unwrap();
|
||||||
|
assert_index_size(index, mapper.index_base_map_size);
|
||||||
|
|
||||||
|
mapper.resize_index(&env.read_txn().unwrap(), "index").unwrap();
|
||||||
|
|
||||||
|
let index = mapper.create_index(env.write_txn().unwrap(), "index", None).unwrap();
|
||||||
|
assert_index_size(index, mapper.index_base_map_size + mapper.index_growth_amount);
|
||||||
|
|
||||||
|
mapper.resize_index(&env.read_txn().unwrap(), "index").unwrap();
|
||||||
|
|
||||||
|
let index = mapper.create_index(env.write_txn().unwrap(), "index", None).unwrap();
|
||||||
|
assert_index_size(index, mapper.index_base_map_size + mapper.index_growth_amount * 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn assert_index_size(index: Index, expected: usize) {
|
||||||
|
let expected = clamp_to_page_size(expected);
|
||||||
|
let index_map_size = index.map_size().unwrap();
|
||||||
|
assert_eq!(index_map_size, expected);
|
||||||
|
}
|
||||||
|
}
|
@ -1,21 +1,22 @@
|
|||||||
use std::collections::hash_map::Entry;
|
use std::path::PathBuf;
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::sync::{Arc, RwLock};
|
use std::sync::{Arc, RwLock};
|
||||||
|
use std::time::Duration;
|
||||||
use std::{fs, thread};
|
use std::{fs, thread};
|
||||||
|
|
||||||
use log::error;
|
use log::error;
|
||||||
use meilisearch_types::heed::types::Str;
|
use meilisearch_types::heed::types::Str;
|
||||||
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn};
|
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use meilisearch_types::milli::Index;
|
use meilisearch_types::milli::Index;
|
||||||
use synchronoise::SignalEvent;
|
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use self::IndexStatus::{Available, BeingDeleted, BeingResized};
|
use self::index_map::IndexMap;
|
||||||
|
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
|
||||||
use crate::uuid_codec::UuidCodec;
|
use crate::uuid_codec::UuidCodec;
|
||||||
use crate::{clamp_to_page_size, Error, Result};
|
use crate::{Error, Result};
|
||||||
|
|
||||||
|
mod index_map;
|
||||||
|
|
||||||
const INDEX_MAPPING: &str = "index-mapping";
|
const INDEX_MAPPING: &str = "index-mapping";
|
||||||
|
|
||||||
@ -26,17 +27,38 @@ const INDEX_MAPPING: &str = "index-mapping";
|
|||||||
/// 2. Opening indexes and storing references to these opened indexes
|
/// 2. Opening indexes and storing references to these opened indexes
|
||||||
/// 3. Accessing indexes through their uuid
|
/// 3. Accessing indexes through their uuid
|
||||||
/// 4. Mapping a user-defined name to each index uuid.
|
/// 4. Mapping a user-defined name to each index uuid.
|
||||||
|
///
|
||||||
|
/// # Implementation notes
|
||||||
|
///
|
||||||
|
/// An index exists as 3 bits of data:
|
||||||
|
/// 1. The index data on disk, that can exist in 3 states: Missing, Present, or BeingDeleted.
|
||||||
|
/// 2. The persistent database containing the association between the index' name and its UUID,
|
||||||
|
/// that can exist in 2 states: Missing or Present.
|
||||||
|
/// 3. The state of the index in the in-memory `IndexMap`, that can exist in multiple states:
|
||||||
|
/// - Missing
|
||||||
|
/// - Available
|
||||||
|
/// - Closing (because an index needs resizing or was evicted from the cache)
|
||||||
|
/// - BeingDeleted
|
||||||
|
///
|
||||||
|
/// All of this data should be kept consistent between index operations, which is achieved by the `IndexMapper`
|
||||||
|
/// with the use of the following primitives:
|
||||||
|
/// - A RwLock on the `IndexMap`.
|
||||||
|
/// - Transactions on the association database.
|
||||||
|
/// - ClosingEvent signals emitted when closing an environment.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct IndexMapper {
|
pub struct IndexMapper {
|
||||||
/// Keep track of the opened indexes. Used mainly by the index resolver.
|
/// Keep track of the opened indexes. Used mainly by the index resolver.
|
||||||
index_map: Arc<RwLock<HashMap<Uuid, IndexStatus>>>,
|
index_map: Arc<RwLock<IndexMap>>,
|
||||||
|
|
||||||
/// Map an index name with an index uuid currently available on disk.
|
/// Map an index name with an index uuid currently available on disk.
|
||||||
pub(crate) index_mapping: Database<Str, UuidCodec>,
|
pub(crate) index_mapping: Database<Str, UuidCodec>,
|
||||||
|
|
||||||
/// Path to the folder where the LMDB environments of each index are.
|
/// Path to the folder where the LMDB environments of each index are.
|
||||||
base_path: PathBuf,
|
base_path: PathBuf,
|
||||||
index_size: usize,
|
/// The map size an index is opened with on the first time.
|
||||||
|
index_base_map_size: usize,
|
||||||
|
/// The quantity by which the map size of an index is incremented upon reopening, in bytes.
|
||||||
|
index_growth_amount: usize,
|
||||||
pub indexer_config: Arc<IndexerConfig>,
|
pub indexer_config: Arc<IndexerConfig>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -44,10 +66,12 @@ pub struct IndexMapper {
|
|||||||
#[allow(clippy::large_enum_variant)]
|
#[allow(clippy::large_enum_variant)]
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub enum IndexStatus {
|
pub enum IndexStatus {
|
||||||
|
/// Not currently in the index map.
|
||||||
|
Missing,
|
||||||
/// Do not insert it back in the index map as it is currently being deleted.
|
/// Do not insert it back in the index map as it is currently being deleted.
|
||||||
BeingDeleted,
|
BeingDeleted,
|
||||||
/// Temporarily do not insert the index in the index map as it is currently being resized.
|
/// Temporarily do not insert the index in the index map as it is currently being resized/evicted from the map.
|
||||||
BeingResized(Arc<SignalEvent>),
|
Closing(index_map::ClosingIndex),
|
||||||
/// You can use the index without worrying about anything.
|
/// You can use the index without worrying about anything.
|
||||||
Available(Index),
|
Available(Index),
|
||||||
}
|
}
|
||||||
@ -56,37 +80,21 @@ impl IndexMapper {
|
|||||||
pub fn new(
|
pub fn new(
|
||||||
env: &Env,
|
env: &Env,
|
||||||
base_path: PathBuf,
|
base_path: PathBuf,
|
||||||
index_size: usize,
|
index_base_map_size: usize,
|
||||||
|
index_growth_amount: usize,
|
||||||
|
index_count: usize,
|
||||||
indexer_config: IndexerConfig,
|
indexer_config: IndexerConfig,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
index_map: Arc::default(),
|
index_map: Arc::new(RwLock::new(IndexMap::new(index_count))),
|
||||||
index_mapping: env.create_database(Some(INDEX_MAPPING))?,
|
index_mapping: env.create_database(Some(INDEX_MAPPING))?,
|
||||||
base_path,
|
base_path,
|
||||||
index_size,
|
index_base_map_size,
|
||||||
|
index_growth_amount,
|
||||||
indexer_config: Arc::new(indexer_config),
|
indexer_config: Arc::new(indexer_config),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create or open an index in the specified path.
|
|
||||||
/// The path *must* exists or an error will be thrown.
|
|
||||||
fn create_or_open_index(
|
|
||||||
&self,
|
|
||||||
path: &Path,
|
|
||||||
date: Option<(OffsetDateTime, OffsetDateTime)>,
|
|
||||||
map_size: usize,
|
|
||||||
) -> Result<Index> {
|
|
||||||
let mut options = EnvOpenOptions::new();
|
|
||||||
options.map_size(clamp_to_page_size(map_size));
|
|
||||||
options.max_readers(1024);
|
|
||||||
|
|
||||||
if let Some((created, updated)) = date {
|
|
||||||
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
|
|
||||||
} else {
|
|
||||||
Ok(Index::new(options, path)?)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get or create the index.
|
/// Get or create the index.
|
||||||
pub fn create_index(
|
pub fn create_index(
|
||||||
&self,
|
&self,
|
||||||
@ -106,16 +114,17 @@ impl IndexMapper {
|
|||||||
let index_path = self.base_path.join(uuid.to_string());
|
let index_path = self.base_path.join(uuid.to_string());
|
||||||
fs::create_dir_all(&index_path)?;
|
fs::create_dir_all(&index_path)?;
|
||||||
|
|
||||||
let index = self.create_or_open_index(&index_path, date, self.index_size)?;
|
|
||||||
|
|
||||||
wtxn.commit()?;
|
|
||||||
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
|
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
|
||||||
// This is very unlikely to happen in practice.
|
// This is very unlikely to happen in practice.
|
||||||
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
|
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
|
||||||
if self.index_map.write().unwrap().insert(uuid, Available(index.clone())).is_some()
|
let index = self.index_map.write().unwrap().create(
|
||||||
{
|
&uuid,
|
||||||
panic!("Uuid v4 conflict: index with UUID {uuid} already exists.");
|
&index_path,
|
||||||
}
|
date,
|
||||||
|
self.index_base_map_size,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
wtxn.commit()?;
|
||||||
|
|
||||||
Ok(index)
|
Ok(index)
|
||||||
}
|
}
|
||||||
@ -135,23 +144,42 @@ impl IndexMapper {
|
|||||||
assert!(self.index_mapping.delete(&mut wtxn, name)?);
|
assert!(self.index_mapping.delete(&mut wtxn, name)?);
|
||||||
|
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
// We remove the index from the in-memory index map.
|
|
||||||
|
let mut tries = 0;
|
||||||
|
// Attempts to remove the index from the in-memory index map in a loop.
|
||||||
|
//
|
||||||
|
// If the index is currently being closed, we will wait for it to be closed and retry getting it in a subsequent
|
||||||
|
// loop iteration.
|
||||||
|
//
|
||||||
|
// We make 100 attempts before giving up.
|
||||||
|
// This could happen in the following situations:
|
||||||
|
//
|
||||||
|
// 1. There is a bug preventing the index from being correctly closed, or us from detecting this.
|
||||||
|
// 2. A user of the index is keeping it open for more than 600 seconds. This could happen e.g. during a pathological search.
|
||||||
|
// This can not be caused by indexation because deleting an index happens in the scheduler itself, so cannot be concurrent with indexation.
|
||||||
|
//
|
||||||
|
// In these situations, reporting the error through a panic is in order.
|
||||||
let closing_event = loop {
|
let closing_event = loop {
|
||||||
let mut lock = self.index_map.write().unwrap();
|
let mut lock = self.index_map.write().unwrap();
|
||||||
let resize_operation = match lock.insert(uuid, BeingDeleted) {
|
match lock.start_deletion(&uuid) {
|
||||||
Some(Available(index)) => break Some(index.prepare_for_closing()),
|
Ok(env_closing) => break env_closing,
|
||||||
// The target index is in the middle of a resize operation.
|
Err(Some(reopen)) => {
|
||||||
// Wait for this operation to complete, then try again.
|
// drop the lock here so that we don't synchronously wait for the index to close.
|
||||||
Some(BeingResized(resize_operation)) => resize_operation.clone(),
|
drop(lock);
|
||||||
// The index is already being deleted or doesn't exist.
|
tries += 1;
|
||||||
// It's OK to remove it from the map again.
|
if tries >= 100 {
|
||||||
_ => break None,
|
panic!("Too many attempts to close index {name} prior to deletion.")
|
||||||
};
|
}
|
||||||
|
let reopen = if let Some(reopen) = reopen.wait_timeout(Duration::from_secs(6)) {
|
||||||
// Avoiding deadlocks: we need to drop the lock before waiting for the end of the resize, which
|
reopen
|
||||||
// will involve operations on the very map we're locking.
|
} else {
|
||||||
drop(lock);
|
continue;
|
||||||
resize_operation.wait();
|
};
|
||||||
|
reopen.close(&mut self.index_map.write().unwrap());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Err(None) => return Ok(()),
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let index_map = self.index_map.clone();
|
let index_map = self.index_map.clone();
|
||||||
@ -161,7 +189,7 @@ impl IndexMapper {
|
|||||||
.name(String::from("index_deleter"))
|
.name(String::from("index_deleter"))
|
||||||
.spawn(move || {
|
.spawn(move || {
|
||||||
// We first wait to be sure that the previously opened index is effectively closed.
|
// We first wait to be sure that the previously opened index is effectively closed.
|
||||||
// This can take a lot of time, this is why we do that in a seperate thread.
|
// This can take a lot of time, this is why we do that in a separate thread.
|
||||||
if let Some(closing_event) = closing_event {
|
if let Some(closing_event) = closing_event {
|
||||||
closing_event.wait();
|
closing_event.wait();
|
||||||
}
|
}
|
||||||
@ -175,7 +203,7 @@ impl IndexMapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Finally we remove the entry from the index map.
|
// Finally we remove the entry from the index map.
|
||||||
assert!(matches!(index_map.write().unwrap().remove(&uuid), Some(BeingDeleted)));
|
index_map.write().unwrap().end_deletion(&uuid);
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -195,76 +223,15 @@ impl IndexMapper {
|
|||||||
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
|
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
|
||||||
/// in memory hash map.
|
/// in memory hash map.
|
||||||
pub fn resize_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
|
pub fn resize_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
|
||||||
// fixme: factor to a function?
|
|
||||||
let uuid = self
|
let uuid = self
|
||||||
.index_mapping
|
.index_mapping
|
||||||
.get(rtxn, name)?
|
.get(rtxn, name)?
|
||||||
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
|
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
|
||||||
|
|
||||||
// We remove the index from the in-memory index map.
|
// We remove the index from the in-memory index map.
|
||||||
let mut lock = self.index_map.write().unwrap();
|
self.index_map.write().unwrap().close_for_resize(&uuid, self.index_growth_amount);
|
||||||
// signal that will be sent when the resize operation completes
|
|
||||||
let resize_operation = Arc::new(SignalEvent::manual(false));
|
|
||||||
let index = match lock.insert(uuid, BeingResized(resize_operation)) {
|
|
||||||
Some(Available(index)) => index,
|
|
||||||
Some(previous_status) => {
|
|
||||||
lock.insert(uuid, previous_status);
|
|
||||||
panic!(
|
|
||||||
"Attempting to resize index {name} that is already being resized or deleted."
|
|
||||||
)
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
panic!("Could not find the status of index {name} in the in-memory index mapper.")
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
drop(lock);
|
Ok(())
|
||||||
|
|
||||||
let resize_succeeded = (move || {
|
|
||||||
let current_size = index.map_size()?;
|
|
||||||
let new_size = current_size * 2;
|
|
||||||
let closing_event = index.prepare_for_closing();
|
|
||||||
|
|
||||||
log::debug!("Waiting for index {name} to close");
|
|
||||||
|
|
||||||
if !closing_event.wait_timeout(std::time::Duration::from_secs(600)) {
|
|
||||||
// fail after 10 minutes waiting
|
|
||||||
panic!("Could not resize index {name} (unable to close it)");
|
|
||||||
}
|
|
||||||
|
|
||||||
log::info!("Resized index {name} from {current_size} to {new_size} bytes");
|
|
||||||
let index_path = self.base_path.join(uuid.to_string());
|
|
||||||
let index = self.create_or_open_index(&index_path, None, new_size)?;
|
|
||||||
Ok(index)
|
|
||||||
})();
|
|
||||||
|
|
||||||
// Put the map back to a consistent state.
|
|
||||||
// Even if there was an error we don't want to leave the map in an inconsistent state as it would cause
|
|
||||||
// deadlocks.
|
|
||||||
let mut lock = self.index_map.write().unwrap();
|
|
||||||
let (resize_operation, resize_succeeded) = match resize_succeeded {
|
|
||||||
Ok(index) => {
|
|
||||||
// insert the resized index
|
|
||||||
let Some(BeingResized(resize_operation)) = lock.insert(uuid, Available(index)) else {
|
|
||||||
panic!("Index state for index {name} was modified while it was being resized")
|
|
||||||
};
|
|
||||||
|
|
||||||
(resize_operation, Ok(()))
|
|
||||||
}
|
|
||||||
Err(error) => {
|
|
||||||
// there was an error, not much we can do... delete the index from the in-memory map to prevent future errors
|
|
||||||
let Some(BeingResized(resize_operation)) = lock.remove(&uuid) else {
|
|
||||||
panic!("Index state for index {name} was modified while it was being resized")
|
|
||||||
};
|
|
||||||
(resize_operation, Err(error))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// drop the lock before signaling completion so that other threads don't immediately await on the lock after waking up.
|
|
||||||
drop(lock);
|
|
||||||
resize_operation.signal();
|
|
||||||
|
|
||||||
resize_succeeded
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return an index, may open it if it wasn't already opened.
|
/// Return an index, may open it if it wasn't already opened.
|
||||||
@ -274,47 +241,68 @@ impl IndexMapper {
|
|||||||
.get(rtxn, name)?
|
.get(rtxn, name)?
|
||||||
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
|
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
|
||||||
|
|
||||||
// we clone here to drop the lock before entering the match
|
let mut tries = 0;
|
||||||
|
// attempts to open the index in a loop.
|
||||||
|
//
|
||||||
|
// If the index is currently being closed, we will wait for it to be closed and retry getting it in a subsequent
|
||||||
|
// loop iteration.
|
||||||
|
//
|
||||||
|
// We make 100 attempts before giving up.
|
||||||
|
// This could happen in the following situations:
|
||||||
|
//
|
||||||
|
// 1. There is a bug preventing the index from being correctly closed, or us from detecting it was.
|
||||||
|
// 2. A user of the index is keeping it open for more than 600 seconds. This could happen e.g. during a long indexation,
|
||||||
|
// a pathological search, and so on.
|
||||||
|
//
|
||||||
|
// In these situations, reporting the error through a panic is in order.
|
||||||
let index = loop {
|
let index = loop {
|
||||||
let index = self.index_map.read().unwrap().get(&uuid).cloned();
|
tries += 1;
|
||||||
|
if tries > 100 {
|
||||||
|
panic!("Too many spurious wake ups while trying to open the index {name}");
|
||||||
|
}
|
||||||
|
|
||||||
|
// we get the index here to drop the lock before entering the match
|
||||||
|
let index = self.index_map.read().unwrap().get(&uuid);
|
||||||
|
|
||||||
match index {
|
match index {
|
||||||
Some(Available(index)) => break index,
|
Available(index) => break index,
|
||||||
Some(BeingResized(ref resize_operation)) => {
|
Closing(reopen) => {
|
||||||
// Avoiding deadlocks: no lock taken while doing this operation.
|
// Avoiding deadlocks: no lock taken while doing this operation.
|
||||||
resize_operation.wait();
|
let reopen = if let Some(reopen) = reopen.wait_timeout(Duration::from_secs(6)) {
|
||||||
|
reopen
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let index_path = self.base_path.join(uuid.to_string());
|
||||||
|
// take the lock to reopen the environment.
|
||||||
|
reopen.reopen(&mut self.index_map.write().unwrap(), &index_path)?;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
|
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
|
||||||
// since we're lazy, it's possible that the index has not been opened yet.
|
// since we're lazy, it's possible that the index has not been opened yet.
|
||||||
None => {
|
Missing => {
|
||||||
let mut index_map = self.index_map.write().unwrap();
|
let mut index_map = self.index_map.write().unwrap();
|
||||||
// between the read lock and the write lock it's not impossible
|
// between the read lock and the write lock it's not impossible
|
||||||
// that someone already opened the index (eg if two search happens
|
// that someone already opened the index (eg if two searches happen
|
||||||
// at the same time), thus before opening it we check a second time
|
// at the same time), thus before opening it we check a second time
|
||||||
// if it's not already there.
|
// if it's not already there.
|
||||||
// Since there is a good chance it's not already there we can use
|
match index_map.get(&uuid) {
|
||||||
// the entry method.
|
Missing => {
|
||||||
match index_map.entry(uuid) {
|
|
||||||
Entry::Vacant(entry) => {
|
|
||||||
let index_path = self.base_path.join(uuid.to_string());
|
let index_path = self.base_path.join(uuid.to_string());
|
||||||
|
|
||||||
let index =
|
break index_map.create(
|
||||||
self.create_or_open_index(&index_path, None, self.index_size)?;
|
&uuid,
|
||||||
entry.insert(Available(index.clone()));
|
&index_path,
|
||||||
break index;
|
None,
|
||||||
|
self.index_base_map_size,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
Entry::Occupied(entry) => match entry.get() {
|
Available(index) => break index,
|
||||||
Available(index) => break index.clone(),
|
Closing(_) => {
|
||||||
BeingResized(resize_operation) => {
|
// the reopening will be handled in the next loop operation
|
||||||
// Avoiding the deadlock: we drop the lock before waiting
|
continue;
|
||||||
let resize_operation = resize_operation.clone();
|
}
|
||||||
drop(index_map);
|
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
|
||||||
resize_operation.wait();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -323,18 +311,38 @@ impl IndexMapper {
|
|||||||
Ok(index)
|
Ok(index)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return all indexes, may open them if they weren't already opened.
|
/// Attempts `f` for each index that exists in the index mapper.
|
||||||
pub fn indexes(&self, rtxn: &RoTxn) -> Result<Vec<(String, Index)>> {
|
///
|
||||||
|
/// It is preferable to use this function rather than a loop that opens all indexes, as a way to avoid having all indexes opened,
|
||||||
|
/// which is unsupported in general.
|
||||||
|
///
|
||||||
|
/// Since `f` is allowed to return a result, and `Index` is cloneable, it is still possible to wrongly build e.g. a vector of
|
||||||
|
/// all the indexes, but this function makes it harder and so less likely to do accidentally.
|
||||||
|
pub fn try_for_each_index<U, V>(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
mut f: impl FnMut(&str, &Index) -> Result<U>,
|
||||||
|
) -> Result<V>
|
||||||
|
where
|
||||||
|
V: FromIterator<U>,
|
||||||
|
{
|
||||||
self.index_mapping
|
self.index_mapping
|
||||||
.iter(rtxn)?
|
.iter(rtxn)?
|
||||||
.map(|ret| {
|
.map(|res| {
|
||||||
ret.map_err(Error::from).and_then(|(name, _)| {
|
res.map_err(Error::from)
|
||||||
self.index(rtxn, name).map(|index| (name.to_string(), index))
|
.and_then(|(name, _)| self.index(rtxn, name).and_then(|index| f(name, &index)))
|
||||||
})
|
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the name of all indexes without opening them.
|
||||||
|
pub fn index_names(&self, rtxn: &RoTxn) -> Result<Vec<String>> {
|
||||||
|
self.index_mapping
|
||||||
|
.iter(rtxn)?
|
||||||
|
.map(|res| res.map_err(Error::from).map(|(name, _)| name.to_string()))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
/// Swap two index names.
|
/// Swap two index names.
|
||||||
pub fn swap(&self, wtxn: &mut RwTxn, lhs: &str, rhs: &str) -> Result<()> {
|
pub fn swap(&self, wtxn: &mut RwTxn, lhs: &str, rhs: &str) -> Result<()> {
|
||||||
let lhs_uuid = self
|
let lhs_uuid = self
|
@ -254,6 +254,6 @@ pub fn snapshot_canceled_by(
|
|||||||
snap
|
snap
|
||||||
}
|
}
|
||||||
pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
|
pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
|
||||||
let names = mapper.indexes(rtxn).unwrap().into_iter().map(|(n, _)| n).collect::<Vec<_>>();
|
let names = mapper.index_names(rtxn).unwrap();
|
||||||
format!("{names:?}")
|
format!("{names:?}")
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,7 @@ pub mod error;
|
|||||||
mod index_mapper;
|
mod index_mapper;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod insta_snapshot;
|
mod insta_snapshot;
|
||||||
|
mod lru;
|
||||||
mod utils;
|
mod utils;
|
||||||
mod uuid_codec;
|
mod uuid_codec;
|
||||||
|
|
||||||
@ -31,7 +32,7 @@ pub type Result<T> = std::result::Result<T, Error>;
|
|||||||
pub type TaskId = u32;
|
pub type TaskId = u32;
|
||||||
|
|
||||||
use std::ops::{Bound, RangeBounds};
|
use std::ops::{Bound, RangeBounds};
|
||||||
use std::path::PathBuf;
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
use std::sync::atomic::Ordering::Relaxed;
|
use std::sync::atomic::Ordering::Relaxed;
|
||||||
use std::sync::{Arc, RwLock};
|
use std::sync::{Arc, RwLock};
|
||||||
@ -43,7 +44,6 @@ use file_store::FileStore;
|
|||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||||
use meilisearch_types::heed::{self, Database, Env, RoTxn};
|
use meilisearch_types::heed::{self, Database, Env, RoTxn};
|
||||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
|
||||||
use meilisearch_types::milli;
|
use meilisearch_types::milli;
|
||||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
@ -230,8 +230,12 @@ pub struct IndexSchedulerOptions {
|
|||||||
pub dumps_path: PathBuf,
|
pub dumps_path: PathBuf,
|
||||||
/// The maximum size, in bytes, of the task index.
|
/// The maximum size, in bytes, of the task index.
|
||||||
pub task_db_size: usize,
|
pub task_db_size: usize,
|
||||||
/// The maximum size, in bytes, of each meilisearch index.
|
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
|
||||||
pub index_size: usize,
|
pub index_base_map_size: usize,
|
||||||
|
/// The size, in bytes, by which the map size of an index is increased when it resized due to being full.
|
||||||
|
pub index_growth_amount: usize,
|
||||||
|
/// The number of indexes that can be concurrently opened in memory.
|
||||||
|
pub index_count: usize,
|
||||||
/// Configuration used during indexing for each meilisearch index.
|
/// Configuration used during indexing for each meilisearch index.
|
||||||
pub indexer_config: IndexerConfig,
|
pub indexer_config: IndexerConfig,
|
||||||
/// Set to `true` iff the index scheduler is allowed to automatically
|
/// Set to `true` iff the index scheduler is allowed to automatically
|
||||||
@ -361,9 +365,25 @@ impl IndexScheduler {
|
|||||||
std::fs::create_dir_all(&options.indexes_path)?;
|
std::fs::create_dir_all(&options.indexes_path)?;
|
||||||
std::fs::create_dir_all(&options.dumps_path)?;
|
std::fs::create_dir_all(&options.dumps_path)?;
|
||||||
|
|
||||||
|
let task_db_size = clamp_to_page_size(options.task_db_size);
|
||||||
|
let budget = if options.indexer_config.skip_index_budget {
|
||||||
|
IndexBudget {
|
||||||
|
map_size: options.index_base_map_size,
|
||||||
|
index_count: options.index_count,
|
||||||
|
task_db_size,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Self::index_budget(
|
||||||
|
&options.tasks_path,
|
||||||
|
options.index_base_map_size,
|
||||||
|
task_db_size,
|
||||||
|
options.index_count,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
let env = heed::EnvOpenOptions::new()
|
let env = heed::EnvOpenOptions::new()
|
||||||
.max_dbs(10)
|
.max_dbs(10)
|
||||||
.map_size(clamp_to_page_size(options.task_db_size))
|
.map_size(budget.task_db_size)
|
||||||
.open(options.tasks_path)?;
|
.open(options.tasks_path)?;
|
||||||
let file_store = FileStore::new(&options.update_file_path)?;
|
let file_store = FileStore::new(&options.update_file_path)?;
|
||||||
|
|
||||||
@ -383,7 +403,9 @@ impl IndexScheduler {
|
|||||||
index_mapper: IndexMapper::new(
|
index_mapper: IndexMapper::new(
|
||||||
&env,
|
&env,
|
||||||
options.indexes_path,
|
options.indexes_path,
|
||||||
options.index_size,
|
budget.map_size,
|
||||||
|
options.index_growth_amount,
|
||||||
|
budget.index_count,
|
||||||
options.indexer_config,
|
options.indexer_config,
|
||||||
)?,
|
)?,
|
||||||
env,
|
env,
|
||||||
@ -407,6 +429,75 @@ impl IndexScheduler {
|
|||||||
Ok(this)
|
Ok(this)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn index_budget(
|
||||||
|
tasks_path: &Path,
|
||||||
|
base_map_size: usize,
|
||||||
|
mut task_db_size: usize,
|
||||||
|
max_index_count: usize,
|
||||||
|
) -> IndexBudget {
|
||||||
|
#[cfg(windows)]
|
||||||
|
const DEFAULT_BUDGET: usize = 6 * 1024 * 1024 * 1024 * 1024; // 6 TiB, 1 index
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
const DEFAULT_BUDGET: usize = 80 * 1024 * 1024 * 1024 * 1024; // 80 TiB, 18 indexes
|
||||||
|
|
||||||
|
let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) {
|
||||||
|
DEFAULT_BUDGET
|
||||||
|
} else {
|
||||||
|
log::debug!("determining budget with dichotomic search");
|
||||||
|
utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| {
|
||||||
|
Self::is_good_heed(tasks_path, map_size)
|
||||||
|
})
|
||||||
|
};
|
||||||
|
|
||||||
|
log::debug!("memmap budget: {budget}B");
|
||||||
|
let mut budget = budget / 2;
|
||||||
|
if task_db_size > (budget / 2) {
|
||||||
|
task_db_size = clamp_to_page_size(budget * 2 / 5);
|
||||||
|
log::debug!(
|
||||||
|
"Decreasing max size of task DB to {task_db_size}B due to constrained memory space"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
budget -= task_db_size;
|
||||||
|
|
||||||
|
// won't be mutated again
|
||||||
|
let budget = budget;
|
||||||
|
let task_db_size = task_db_size;
|
||||||
|
|
||||||
|
log::debug!("index budget: {budget}B");
|
||||||
|
let mut index_count = budget / base_map_size;
|
||||||
|
if index_count < 2 {
|
||||||
|
// take a bit less than half than the budget to make sure we can always afford to open an index
|
||||||
|
let map_size = (budget * 2) / 5;
|
||||||
|
// single index of max budget
|
||||||
|
log::debug!("1 index of {map_size}B can be opened simultaneously.");
|
||||||
|
return IndexBudget { map_size, index_count: 1, task_db_size };
|
||||||
|
}
|
||||||
|
// give us some space for an additional index when the cache is already full
|
||||||
|
// decrement is OK because index_count >= 2.
|
||||||
|
index_count -= 1;
|
||||||
|
if index_count > max_index_count {
|
||||||
|
index_count = max_index_count;
|
||||||
|
}
|
||||||
|
log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
|
||||||
|
IndexBudget { map_size: base_map_size, index_count, task_db_size }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_good_heed(tasks_path: &Path, map_size: usize) -> bool {
|
||||||
|
if let Ok(env) =
|
||||||
|
heed::EnvOpenOptions::new().map_size(clamp_to_page_size(map_size)).open(tasks_path)
|
||||||
|
{
|
||||||
|
env.prepare_for_closing().wait();
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
// We're treating all errors equally here, not only allocation errors.
|
||||||
|
// This means there's a possiblity for the budget to lower due to errors different from allocation errors.
|
||||||
|
// For persistent errors, this is OK as long as the task db is then reopened normally without ignoring the error this time.
|
||||||
|
// For transient errors, this could lead to an instance with too low a budget.
|
||||||
|
// However transient errors are: 1) less likely than persistent errors 2) likely to cause other issues down the line anyway.
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn read_txn(&self) -> Result<RoTxn> {
|
pub fn read_txn(&self) -> Result<RoTxn> {
|
||||||
self.env.read_txn().map_err(|e| e.into())
|
self.env.read_txn().map_err(|e| e.into())
|
||||||
}
|
}
|
||||||
@ -460,15 +551,42 @@ impl IndexScheduler {
|
|||||||
///
|
///
|
||||||
/// * If the index wasn't opened before, the index will be opened.
|
/// * If the index wasn't opened before, the index will be opened.
|
||||||
/// * If the index doesn't exist on disk, the `IndexNotFoundError` is thrown.
|
/// * If the index doesn't exist on disk, the `IndexNotFoundError` is thrown.
|
||||||
|
///
|
||||||
|
/// ### Note
|
||||||
|
///
|
||||||
|
/// As an `Index` requires a large swath of the virtual memory address space, correct usage of an `Index` does not
|
||||||
|
/// keep its handle for too long.
|
||||||
|
///
|
||||||
|
/// Some configurations also can't reasonably open multiple indexes at once.
|
||||||
|
/// If you need to fetch information from or perform an action on all indexes,
|
||||||
|
/// see the `try_for_each_index` function.
|
||||||
pub fn index(&self, name: &str) -> Result<Index> {
|
pub fn index(&self, name: &str) -> Result<Index> {
|
||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
self.index_mapper.index(&rtxn, name)
|
self.index_mapper.index(&rtxn, name)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return and open all the indexes.
|
/// Return the name of all indexes without opening them.
|
||||||
pub fn indexes(&self) -> Result<Vec<(String, Index)>> {
|
pub fn index_names(self) -> Result<Vec<String>> {
|
||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
self.index_mapper.indexes(&rtxn)
|
self.index_mapper.index_names(&rtxn)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attempts `f` for each index that exists known to the index scheduler.
|
||||||
|
///
|
||||||
|
/// It is preferable to use this function rather than a loop that opens all indexes, as a way to avoid having all indexes opened,
|
||||||
|
/// which is unsupported in general.
|
||||||
|
///
|
||||||
|
/// Since `f` is allowed to return a result, and `Index` is cloneable, it is still possible to wrongly build e.g. a vector of
|
||||||
|
/// all the indexes, but this function makes it harder and so less likely to do accidentally.
|
||||||
|
///
|
||||||
|
/// If many indexes exist, this operation can take time to complete (in the order of seconds for a 1000 of indexes) as it needs to open
|
||||||
|
/// all the indexes.
|
||||||
|
pub fn try_for_each_index<U, V>(&self, f: impl FnMut(&str, &Index) -> Result<U>) -> Result<V>
|
||||||
|
where
|
||||||
|
V: FromIterator<U>,
|
||||||
|
{
|
||||||
|
let rtxn = self.env.read_txn()?;
|
||||||
|
self.index_mapper.try_for_each_index(&rtxn, f)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the task ids matched by the given query from the index scheduler's point of view.
|
/// Return the task ids matched by the given query from the index scheduler's point of view.
|
||||||
@ -630,13 +748,13 @@ impl IndexScheduler {
|
|||||||
&self,
|
&self,
|
||||||
rtxn: &RoTxn,
|
rtxn: &RoTxn,
|
||||||
query: &Query,
|
query: &Query,
|
||||||
authorized_indexes: &Option<Vec<IndexUidPattern>>,
|
filters: &meilisearch_auth::AuthFilter,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let mut tasks = self.get_task_ids(rtxn, query)?;
|
let mut tasks = self.get_task_ids(rtxn, query)?;
|
||||||
|
|
||||||
// If the query contains a list of index uid or there is a finite list of authorized indexes,
|
// If the query contains a list of index uid or there is a finite list of authorized indexes,
|
||||||
// then we must exclude all the kinds that aren't associated to one and only one index.
|
// then we must exclude all the kinds that aren't associated to one and only one index.
|
||||||
if query.index_uids.is_some() || authorized_indexes.is_some() {
|
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
|
||||||
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
|
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
|
||||||
tasks -= self.get_kind(rtxn, kind)?;
|
tasks -= self.get_kind(rtxn, kind)?;
|
||||||
}
|
}
|
||||||
@ -644,11 +762,11 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
// Any task that is internally associated with a non-authorized index
|
// Any task that is internally associated with a non-authorized index
|
||||||
// must be discarded.
|
// must be discarded.
|
||||||
if let Some(authorized_indexes) = authorized_indexes {
|
if !filters.all_indexes_authorized() {
|
||||||
let all_indexes_iter = self.index_tasks.iter(rtxn)?;
|
let all_indexes_iter = self.index_tasks.iter(rtxn)?;
|
||||||
for result in all_indexes_iter {
|
for result in all_indexes_iter {
|
||||||
let (index, index_tasks) = result?;
|
let (index, index_tasks) = result?;
|
||||||
if !authorized_indexes.iter().any(|p| p.matches_str(index)) {
|
if !filters.is_index_authorized(index) {
|
||||||
tasks -= index_tasks;
|
tasks -= index_tasks;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -668,12 +786,11 @@ impl IndexScheduler {
|
|||||||
pub fn get_tasks_from_authorized_indexes(
|
pub fn get_tasks_from_authorized_indexes(
|
||||||
&self,
|
&self,
|
||||||
query: Query,
|
query: Query,
|
||||||
authorized_indexes: Option<Vec<IndexUidPattern>>,
|
filters: &meilisearch_auth::AuthFilter,
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
|
|
||||||
let tasks =
|
let tasks = self.get_task_ids_from_authorized_indexes(&rtxn, &query, filters)?;
|
||||||
self.get_task_ids_from_authorized_indexes(&rtxn, &query, &authorized_indexes)?;
|
|
||||||
|
|
||||||
let tasks = self.get_existing_tasks(
|
let tasks = self.get_existing_tasks(
|
||||||
&rtxn,
|
&rtxn,
|
||||||
@ -1111,6 +1228,16 @@ pub enum TickOutcome {
|
|||||||
WaitForSignal,
|
WaitForSignal,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// How many indexes we can afford to have open simultaneously.
|
||||||
|
struct IndexBudget {
|
||||||
|
/// Map size of an index.
|
||||||
|
map_size: usize,
|
||||||
|
/// Maximum number of simultaneously opened indexes.
|
||||||
|
index_count: usize,
|
||||||
|
/// For very constrained systems we might need to reduce the base task_db_size so we can accept at least one index.
|
||||||
|
task_db_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::io::{BufWriter, Seek, Write};
|
use std::io::{BufWriter, Seek, Write};
|
||||||
@ -1120,7 +1247,9 @@ mod tests {
|
|||||||
use crossbeam::channel::RecvTimeoutError;
|
use crossbeam::channel::RecvTimeoutError;
|
||||||
use file_store::File;
|
use file_store::File;
|
||||||
use meili_snap::snapshot;
|
use meili_snap::snapshot;
|
||||||
|
use meilisearch_auth::AuthFilter;
|
||||||
use meilisearch_types::document_formats::DocumentFormatError;
|
use meilisearch_types::document_formats::DocumentFormatError;
|
||||||
|
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||||
use meilisearch_types::milli::obkv_to_json;
|
use meilisearch_types::milli::obkv_to_json;
|
||||||
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
||||||
ReplaceDocuments, UpdateDocuments,
|
ReplaceDocuments, UpdateDocuments,
|
||||||
@ -1154,6 +1283,8 @@ mod tests {
|
|||||||
let tempdir = TempDir::new().unwrap();
|
let tempdir = TempDir::new().unwrap();
|
||||||
let (sender, receiver) = crossbeam::channel::bounded(0);
|
let (sender, receiver) = crossbeam::channel::bounded(0);
|
||||||
|
|
||||||
|
let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() };
|
||||||
|
|
||||||
let options = IndexSchedulerOptions {
|
let options = IndexSchedulerOptions {
|
||||||
version_file_path: tempdir.path().join(VERSION_FILE_NAME),
|
version_file_path: tempdir.path().join(VERSION_FILE_NAME),
|
||||||
auth_path: tempdir.path().join("auth"),
|
auth_path: tempdir.path().join("auth"),
|
||||||
@ -1163,8 +1294,10 @@ mod tests {
|
|||||||
snapshots_path: tempdir.path().join("snapshots"),
|
snapshots_path: tempdir.path().join("snapshots"),
|
||||||
dumps_path: tempdir.path().join("dumps"),
|
dumps_path: tempdir.path().join("dumps"),
|
||||||
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||||
index_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||||
indexer_config: IndexerConfig::default(),
|
index_growth_amount: 1000 * 1000, // 1 MB
|
||||||
|
index_count: 5,
|
||||||
|
indexer_config,
|
||||||
autobatching_enabled,
|
autobatching_enabled,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -2371,38 +2504,45 @@ mod tests {
|
|||||||
|
|
||||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||||
let query = Query { limit: Some(0), ..Default::default() };
|
let query = Query { limit: Some(0), ..Default::default() };
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||||
|
|
||||||
let query = Query { limit: Some(1), ..Default::default() };
|
let query = Query { limit: Some(1), ..Default::default() };
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||||
|
|
||||||
let query = Query { limit: Some(2), ..Default::default() };
|
let query = Query { limit: Some(2), ..Default::default() };
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
|
||||||
|
|
||||||
let query = Query { from: Some(1), ..Default::default() };
|
let query = Query { from: Some(1), ..Default::default() };
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||||
|
|
||||||
let query = Query { from: Some(2), ..Default::default() };
|
let query = Query { from: Some(2), ..Default::default() };
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]");
|
||||||
|
|
||||||
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
|
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
||||||
|
|
||||||
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
|
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2427,21 +2567,24 @@ mod tests {
|
|||||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||||
|
|
||||||
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick
|
snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick
|
||||||
|
|
||||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick
|
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick
|
||||||
|
|
||||||
let query = Query {
|
let query = Query {
|
||||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick
|
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick
|
||||||
|
|
||||||
let query = Query {
|
let query = Query {
|
||||||
@ -2449,8 +2592,9 @@ mod tests {
|
|||||||
after_started_at: Some(start_time),
|
after_started_at: Some(start_time),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||||
// that comes after the start of the test, which should excludes the enqueued tasks
|
// that comes after the start of the test, which should excludes the enqueued tasks
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
||||||
@ -2460,8 +2604,9 @@ mod tests {
|
|||||||
before_started_at: Some(start_time),
|
before_started_at: Some(start_time),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||||
// that comes before the start of the test, which should excludes all of them
|
// that comes before the start of the test, which should excludes all of them
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||||
@ -2472,8 +2617,9 @@ mod tests {
|
|||||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||||
// that comes after the start of the test and before one minute after the start of the test,
|
// that comes after the start of the test and before one minute after the start of the test,
|
||||||
// which should exclude the enqueued tasks and include the only processing task
|
// which should exclude the enqueued tasks and include the only processing task
|
||||||
@ -2498,8 +2644,9 @@ mod tests {
|
|||||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||||
// that comes after the start of the test and before one minute after the start of the test,
|
// that comes after the start of the test and before one minute after the start of the test,
|
||||||
// which should include all tasks
|
// which should include all tasks
|
||||||
@ -2510,8 +2657,9 @@ mod tests {
|
|||||||
before_started_at: Some(start_time),
|
before_started_at: Some(start_time),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||||
// that comes before the start of the test, which should exclude all tasks
|
// that comes before the start of the test, which should exclude all tasks
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||||
@ -2522,8 +2670,9 @@ mod tests {
|
|||||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||||
// that comes after the start of the second part of the test and before one minute after the
|
// that comes after the start of the second part of the test and before one minute after the
|
||||||
// second start of the test, which should exclude all tasks
|
// second start of the test, which should exclude all tasks
|
||||||
@ -2541,8 +2690,9 @@ mod tests {
|
|||||||
|
|
||||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||||
|
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// we run the same query to verify that, and indeed find that the last task is matched
|
// we run the same query to verify that, and indeed find that the last task is matched
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||||
|
|
||||||
@ -2552,8 +2702,9 @@ mod tests {
|
|||||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// enqueued, succeeded, or processing tasks started after the second part of the test, should
|
// enqueued, succeeded, or processing tasks started after the second part of the test, should
|
||||||
// again only return the last task
|
// again only return the last task
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||||
@ -2563,8 +2714,9 @@ mod tests {
|
|||||||
|
|
||||||
// now the last task should have failed
|
// now the last task should have failed
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// so running the last query should return nothing
|
// so running the last query should return nothing
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||||
|
|
||||||
@ -2574,8 +2726,9 @@ mod tests {
|
|||||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// but the same query on failed tasks should return the last task
|
// but the same query on failed tasks should return the last task
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||||
|
|
||||||
@ -2585,8 +2738,9 @@ mod tests {
|
|||||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// but the same query on failed tasks should return the last task
|
// but the same query on failed tasks should return the last task
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||||
|
|
||||||
@ -2597,8 +2751,9 @@ mod tests {
|
|||||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// same query but with an invalid uid
|
// same query but with an invalid uid
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||||
|
|
||||||
@ -2609,8 +2764,9 @@ mod tests {
|
|||||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// same query but with a valid uid
|
// same query but with a valid uid
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||||
}
|
}
|
||||||
@ -2640,8 +2796,9 @@ mod tests {
|
|||||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||||
|
|
||||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
|
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
||||||
|
|
||||||
@ -2650,7 +2807,9 @@ mod tests {
|
|||||||
.get_task_ids_from_authorized_indexes(
|
.get_task_ids_from_authorized_indexes(
|
||||||
&rtxn,
|
&rtxn,
|
||||||
&query,
|
&query,
|
||||||
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
|
&AuthFilter::with_allowed_indexes(
|
||||||
|
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
|
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
|
||||||
@ -2662,7 +2821,9 @@ mod tests {
|
|||||||
.get_task_ids_from_authorized_indexes(
|
.get_task_ids_from_authorized_indexes(
|
||||||
&rtxn,
|
&rtxn,
|
||||||
&query,
|
&query,
|
||||||
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
|
&AuthFilter::with_allowed_indexes(
|
||||||
|
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
|
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
|
||||||
@ -2674,10 +2835,14 @@ mod tests {
|
|||||||
.get_task_ids_from_authorized_indexes(
|
.get_task_ids_from_authorized_indexes(
|
||||||
&rtxn,
|
&rtxn,
|
||||||
&query,
|
&query,
|
||||||
&Some(vec![
|
&AuthFilter::with_allowed_indexes(
|
||||||
IndexUidPattern::new_unchecked("catto"),
|
vec![
|
||||||
IndexUidPattern::new_unchecked("doggo"),
|
IndexUidPattern::new_unchecked("catto"),
|
||||||
]),
|
IndexUidPattern::new_unchecked("doggo"),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
|
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
|
||||||
@ -2685,8 +2850,9 @@ mod tests {
|
|||||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||||
|
|
||||||
let query = Query::default();
|
let query = Query::default();
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// we asked for all the tasks with all index authorized -> all tasks returned
|
// we asked for all the tasks with all index authorized -> all tasks returned
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]");
|
||||||
}
|
}
|
||||||
@ -2717,8 +2883,9 @@ mod tests {
|
|||||||
|
|
||||||
let rtxn = index_scheduler.read_txn().unwrap();
|
let rtxn = index_scheduler.read_txn().unwrap();
|
||||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||||
let tasks =
|
let tasks = index_scheduler
|
||||||
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
|
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
|
||||||
|
.unwrap();
|
||||||
// 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the
|
// 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the
|
||||||
// taskCancelation itself
|
// taskCancelation itself
|
||||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
|
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
|
||||||
@ -2728,7 +2895,9 @@ mod tests {
|
|||||||
.get_task_ids_from_authorized_indexes(
|
.get_task_ids_from_authorized_indexes(
|
||||||
&rtxn,
|
&rtxn,
|
||||||
&query,
|
&query,
|
||||||
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
|
&AuthFilter::with_allowed_indexes(
|
||||||
|
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
// Return only 1 because the user is not authorized to see task 2
|
// Return only 1 because the user is not authorized to see task 2
|
||||||
|
203
index-scheduler/src/lru.rs
Normal file
203
index-scheduler/src/lru.rs
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
//! Thread-safe `Vec`-backend LRU cache using [`std::sync::atomic::AtomicU64`] for synchronization.
|
||||||
|
|
||||||
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
|
|
||||||
|
/// Thread-safe `Vec`-backend LRU cache
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Lru<T> {
|
||||||
|
data: Vec<(AtomicU64, T)>,
|
||||||
|
generation: AtomicU64,
|
||||||
|
cap: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Lru<T> {
|
||||||
|
/// Creates a new LRU cache with the specified capacity.
|
||||||
|
///
|
||||||
|
/// The capacity is allocated up-front, and will never change through a [`Self::put`] operation.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// - If the capacity is 0.
|
||||||
|
/// - If the capacity exceeds `isize::MAX` bytes.
|
||||||
|
pub fn new(cap: usize) -> Self {
|
||||||
|
assert_ne!(cap, 0, "The capacity of a cache cannot be 0");
|
||||||
|
Self {
|
||||||
|
// Note: since the element of the vector contains an AtomicU64, it is definitely not zero-sized so cap will never be usize::MAX.
|
||||||
|
data: Vec::with_capacity(cap),
|
||||||
|
generation: AtomicU64::new(0),
|
||||||
|
cap,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The capacity of this LRU cache, that is the maximum number of elements it can hold before evicting elements from the cache.
|
||||||
|
///
|
||||||
|
/// The cache will contain at most this number of elements at any given time.
|
||||||
|
pub fn capacity(&self) -> usize {
|
||||||
|
self.cap
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_generation(&self) -> u64 {
|
||||||
|
// Acquire so this "happens-before" any potential store to a data cell (with Release ordering)
|
||||||
|
let generation = self.generation.fetch_add(1, Ordering::Acquire);
|
||||||
|
generation + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_generation_mut(&mut self) -> u64 {
|
||||||
|
let generation = self.generation.get_mut();
|
||||||
|
*generation += 1;
|
||||||
|
*generation
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a value in the cache, evicting an older value if necessary.
|
||||||
|
///
|
||||||
|
/// If a value was evicted from the cache, it is returned.
|
||||||
|
///
|
||||||
|
/// # Complexity
|
||||||
|
///
|
||||||
|
/// - If the cache is full, then linear in the capacity.
|
||||||
|
/// - Otherwise constant.
|
||||||
|
pub fn put(&mut self, value: T) -> Option<T> {
|
||||||
|
// no need for a memory fence: we assume that whichever mechanism provides us synchronization
|
||||||
|
// (very probably, a RwLock) takes care of fencing for us.
|
||||||
|
|
||||||
|
let next_generation = self.next_generation_mut();
|
||||||
|
let evicted = if self.is_full() { self.pop() } else { None };
|
||||||
|
self.data.push((AtomicU64::new(next_generation), value));
|
||||||
|
evicted
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Evict the oldest value from the cache.
|
||||||
|
///
|
||||||
|
/// If the cache is empty, `None` will be returned.
|
||||||
|
///
|
||||||
|
/// # Complexity
|
||||||
|
///
|
||||||
|
/// - Linear in the capacity of the cache.
|
||||||
|
pub fn pop(&mut self) -> Option<T> {
|
||||||
|
// Don't use `Iterator::min_by_key` that provides shared references to its elements,
|
||||||
|
// so that we can get an exclusive one.
|
||||||
|
// This allows to handles the `AtomicU64`s as normal integers without using atomic instructions.
|
||||||
|
let mut min_generation_index = None;
|
||||||
|
for (index, (generation, _)) in self.data.iter_mut().enumerate() {
|
||||||
|
let generation = *generation.get_mut();
|
||||||
|
if let Some((_, min_generation)) = min_generation_index {
|
||||||
|
if min_generation > generation {
|
||||||
|
min_generation_index = Some((index, generation));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
min_generation_index = Some((index, generation))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
min_generation_index.map(|(min_index, _)| self.data.swap_remove(min_index).1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The current number of elements in the cache.
|
||||||
|
///
|
||||||
|
/// This value is guaranteed to be less than or equal to [`Self::capacity`].
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.data.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if putting any additional element in the cache would cause the eviction of an element.
|
||||||
|
pub fn is_full(&self) -> bool {
|
||||||
|
self.len() == self.capacity()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct LruMap<K, V>(Lru<(K, V)>);
|
||||||
|
|
||||||
|
impl<K, V> LruMap<K, V>
|
||||||
|
where
|
||||||
|
K: Eq,
|
||||||
|
{
|
||||||
|
/// Creates a new LRU cache map with the specified capacity.
|
||||||
|
///
|
||||||
|
/// The capacity is allocated up-front, and will never change through a [`Self::insert`] operation.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// - If the capacity is 0.
|
||||||
|
/// - If the capacity exceeds `isize::MAX` bytes.
|
||||||
|
pub fn new(cap: usize) -> Self {
|
||||||
|
Self(Lru::new(cap))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets a value in the cache map by its key.
|
||||||
|
///
|
||||||
|
/// If no value matches, `None` will be returned.
|
||||||
|
///
|
||||||
|
/// # Complexity
|
||||||
|
///
|
||||||
|
/// - Linear in the capacity of the cache.
|
||||||
|
pub fn get(&self, key: &K) -> Option<&V> {
|
||||||
|
for (generation, (candidate, value)) in self.0.data.iter() {
|
||||||
|
if key == candidate {
|
||||||
|
generation.store(self.0.next_generation(), Ordering::Release);
|
||||||
|
return Some(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets a value in the cache map by its key.
|
||||||
|
///
|
||||||
|
/// If no value matches, `None` will be returned.
|
||||||
|
///
|
||||||
|
/// # Complexity
|
||||||
|
///
|
||||||
|
/// - Linear in the capacity of the cache.
|
||||||
|
pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
|
||||||
|
let next_generation = self.0.next_generation_mut();
|
||||||
|
for (generation, (candidate, value)) in self.0.data.iter_mut() {
|
||||||
|
if key == candidate {
|
||||||
|
*generation.get_mut() = next_generation;
|
||||||
|
return Some(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inserts a value in the cache map by its key, replacing any existing value and returning any evicted value.
|
||||||
|
///
|
||||||
|
/// # Complexity
|
||||||
|
///
|
||||||
|
/// - Linear in the capacity of the cache.
|
||||||
|
pub fn insert(&mut self, key: K, mut value: V) -> InsertionOutcome<K, V> {
|
||||||
|
match self.get_mut(&key) {
|
||||||
|
Some(old_value) => {
|
||||||
|
std::mem::swap(old_value, &mut value);
|
||||||
|
InsertionOutcome::Replaced(value)
|
||||||
|
}
|
||||||
|
None => match self.0.put((key, value)) {
|
||||||
|
Some((key, value)) => InsertionOutcome::Evicted(key, value),
|
||||||
|
None => InsertionOutcome::InsertedNew,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Removes an element from the cache map by its key, returning its value.
|
||||||
|
///
|
||||||
|
/// Returns `None` if there was no element with this key in the cache.
|
||||||
|
///
|
||||||
|
/// # Complexity
|
||||||
|
///
|
||||||
|
/// - Linear in the capacity of the cache.
|
||||||
|
pub fn remove(&mut self, key: &K) -> Option<V> {
|
||||||
|
for (index, (_, (candidate, _))) in self.0.data.iter_mut().enumerate() {
|
||||||
|
if key == candidate {
|
||||||
|
return Some(self.0.data.swap_remove(index).1 .1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The result of an insertion in a LRU map.
|
||||||
|
pub enum InsertionOutcome<K, V> {
|
||||||
|
/// The key was not in the cache, the key-value pair has been inserted.
|
||||||
|
InsertedNew,
|
||||||
|
/// The key was not in the cache and an old key-value pair was evicted from the cache to make room for its insertions.
|
||||||
|
Evicted(K, V),
|
||||||
|
/// The key was already in the cache map, its value has been updated.
|
||||||
|
Replaced(V),
|
||||||
|
}
|
@ -538,3 +538,37 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn dichotomic_search(start_point: usize, mut is_good: impl FnMut(usize) -> bool) -> usize {
|
||||||
|
let mut biggest_good = None;
|
||||||
|
let mut smallest_bad = None;
|
||||||
|
let mut current = start_point;
|
||||||
|
loop {
|
||||||
|
let is_good = is_good(current);
|
||||||
|
|
||||||
|
(biggest_good, smallest_bad, current) = match (biggest_good, smallest_bad, is_good) {
|
||||||
|
(None, None, false) => (None, Some(current), current / 2),
|
||||||
|
(None, None, true) => (Some(current), None, current * 2),
|
||||||
|
(None, Some(smallest_bad), true) => {
|
||||||
|
(Some(current), Some(smallest_bad), (current + smallest_bad) / 2)
|
||||||
|
}
|
||||||
|
(None, Some(_), false) => (None, Some(current), current / 2),
|
||||||
|
(Some(_), None, true) => (Some(current), None, current * 2),
|
||||||
|
(Some(biggest_good), None, false) => {
|
||||||
|
(Some(biggest_good), Some(current), (biggest_good + current) / 2)
|
||||||
|
}
|
||||||
|
(Some(_), Some(smallest_bad), true) => {
|
||||||
|
(Some(current), Some(smallest_bad), (smallest_bad + current) / 2)
|
||||||
|
}
|
||||||
|
(Some(biggest_good), Some(_), false) => {
|
||||||
|
(Some(biggest_good), Some(current), (biggest_good + current) / 2)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if current == 0 {
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
if smallest_bad.is_some() && biggest_good.is_some() && biggest_good >= Some(current) {
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -85,17 +85,13 @@ impl AuthController {
|
|||||||
uid: Uuid,
|
uid: Uuid,
|
||||||
search_rules: Option<SearchRules>,
|
search_rules: Option<SearchRules>,
|
||||||
) -> Result<AuthFilter> {
|
) -> Result<AuthFilter> {
|
||||||
let mut filters = AuthFilter::default();
|
|
||||||
let key = self.get_key(uid)?;
|
let key = self.get_key(uid)?;
|
||||||
|
|
||||||
filters.search_rules = match search_rules {
|
let key_authorized_indexes = SearchRules::Set(key.indexes.into_iter().collect());
|
||||||
Some(search_rules) => search_rules,
|
|
||||||
None => SearchRules::Set(key.indexes.into_iter().collect()),
|
|
||||||
};
|
|
||||||
|
|
||||||
filters.allow_index_creation = self.is_key_authorized(uid, Action::IndexesAdd, None)?;
|
let allow_index_creation = self.is_key_authorized(uid, Action::IndexesAdd, None)?;
|
||||||
|
|
||||||
Ok(filters)
|
Ok(AuthFilter { search_rules, key_authorized_indexes, allow_index_creation })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn list_keys(&self) -> Result<Vec<Key>> {
|
pub fn list_keys(&self) -> Result<Vec<Key>> {
|
||||||
@ -160,13 +156,59 @@ impl AuthController {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct AuthFilter {
|
pub struct AuthFilter {
|
||||||
pub search_rules: SearchRules,
|
search_rules: Option<SearchRules>,
|
||||||
pub allow_index_creation: bool,
|
key_authorized_indexes: SearchRules,
|
||||||
|
allow_index_creation: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for AuthFilter {
|
impl Default for AuthFilter {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { search_rules: SearchRules::default(), allow_index_creation: true }
|
Self {
|
||||||
|
search_rules: None,
|
||||||
|
key_authorized_indexes: SearchRules::default(),
|
||||||
|
allow_index_creation: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AuthFilter {
|
||||||
|
#[inline]
|
||||||
|
pub fn allow_index_creation(&self, index: &str) -> bool {
|
||||||
|
self.allow_index_creation && self.is_index_authorized(index)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self {
|
||||||
|
Self {
|
||||||
|
search_rules: None,
|
||||||
|
key_authorized_indexes: SearchRules::Set(allowed_indexes),
|
||||||
|
allow_index_creation: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn all_indexes_authorized(&self) -> bool {
|
||||||
|
self.key_authorized_indexes.all_indexes_authorized()
|
||||||
|
&& self
|
||||||
|
.search_rules
|
||||||
|
.as_ref()
|
||||||
|
.map(|search_rules| search_rules.all_indexes_authorized())
|
||||||
|
.unwrap_or(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_index_authorized(&self, index: &str) -> bool {
|
||||||
|
self.key_authorized_indexes.is_index_authorized(index)
|
||||||
|
&& self
|
||||||
|
.search_rules
|
||||||
|
.as_ref()
|
||||||
|
.map(|search_rules| search_rules.is_index_authorized(index))
|
||||||
|
.unwrap_or(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
|
||||||
|
if !self.is_index_authorized(index) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let search_rules = self.search_rules.as_ref().unwrap_or(&self.key_authorized_indexes);
|
||||||
|
search_rules.get_index_search_rules(index)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,7 +227,7 @@ impl Default for SearchRules {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl SearchRules {
|
impl SearchRules {
|
||||||
pub fn is_index_authorized(&self, index: &str) -> bool {
|
fn is_index_authorized(&self, index: &str) -> bool {
|
||||||
match self {
|
match self {
|
||||||
Self::Set(set) => {
|
Self::Set(set) => {
|
||||||
set.contains("*")
|
set.contains("*")
|
||||||
@ -200,7 +242,7 @@ impl SearchRules {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
|
fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
|
||||||
match self {
|
match self {
|
||||||
Self::Set(_) => {
|
Self::Set(_) => {
|
||||||
if self.is_index_authorized(index) {
|
if self.is_index_authorized(index) {
|
||||||
@ -219,24 +261,10 @@ impl SearchRules {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the list of indexes such that `self.is_index_authorized(index) == true`,
|
fn all_indexes_authorized(&self) -> bool {
|
||||||
/// or `None` if all indexes satisfy this condition.
|
|
||||||
pub fn authorized_indexes(&self) -> Option<Vec<IndexUidPattern>> {
|
|
||||||
match self {
|
match self {
|
||||||
SearchRules::Set(set) => {
|
SearchRules::Set(set) => set.contains("*"),
|
||||||
if set.contains("*") {
|
SearchRules::Map(map) => map.contains_key("*"),
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(set.iter().cloned().collect())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SearchRules::Map(map) => {
|
|
||||||
if map.contains_key("*") {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(map.keys().cloned().collect())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,8 +11,8 @@ use serde::{Deserialize, Serialize};
|
|||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct ResponseError {
|
pub struct ResponseError {
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
code: StatusCode,
|
pub code: StatusCode,
|
||||||
message: String,
|
pub message: String,
|
||||||
#[serde(rename = "code")]
|
#[serde(rename = "code")]
|
||||||
error_code: String,
|
error_code: String,
|
||||||
#[serde(rename = "type")]
|
#[serde(rename = "type")]
|
||||||
@ -212,6 +212,7 @@ InvalidApiKeyName , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidApiKeyOffset , InvalidRequest , BAD_REQUEST ;
|
InvalidApiKeyOffset , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ;
|
InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||||
|
InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
|
||||||
@ -220,7 +221,6 @@ InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexCsvDelimiter , InvalidRequest , BAD_REQUEST ;
|
|
||||||
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
||||||
|
@ -46,7 +46,7 @@ pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
|
|||||||
pub enum VersionFileError {
|
pub enum VersionFileError {
|
||||||
#[error(
|
#[error(
|
||||||
"Meilisearch (v{}) failed to infer the version of the database.
|
"Meilisearch (v{}) failed to infer the version of the database.
|
||||||
To update Meilisearch please follow our guide on https://docs.meilisearch.com/learn/advanced/updating.html.",
|
To update Meilisearch please follow our guide on https://docs.meilisearch.com/learn/update_and_migration/updating.html.",
|
||||||
env!("CARGO_PKG_VERSION").to_string()
|
env!("CARGO_PKG_VERSION").to_string()
|
||||||
)]
|
)]
|
||||||
MissingVersionFile,
|
MissingVersionFile,
|
||||||
@ -54,7 +54,7 @@ pub enum VersionFileError {
|
|||||||
MalformedVersionFile,
|
MalformedVersionFile,
|
||||||
#[error(
|
#[error(
|
||||||
"Your database version ({major}.{minor}.{patch}) is incompatible with your current engine version ({}).\n\
|
"Your database version ({major}.{minor}.{patch}) is incompatible with your current engine version ({}).\n\
|
||||||
To migrate data between Meilisearch versions, please follow our guide on https://docs.meilisearch.com/learn/advanced/updating.html.",
|
To migrate data between Meilisearch versions, please follow our guide on https://docs.meilisearch.com/learn/update_and_migration/updating.html.",
|
||||||
env!("CARGO_PKG_VERSION").to_string()
|
env!("CARGO_PKG_VERSION").to_string()
|
||||||
)]
|
)]
|
||||||
VersionMismatch { major: String, minor: String, patch: String },
|
VersionMismatch { major: String, minor: String, patch: String },
|
||||||
|
@ -52,7 +52,7 @@ parking_lot = "0.12.1"
|
|||||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||||
pin-project-lite = "0.2.9"
|
pin-project-lite = "0.2.9"
|
||||||
platform-dirs = "0.3.0"
|
platform-dirs = "0.3.0"
|
||||||
prometheus = { version = "0.13.2", features = ["process"], optional = true }
|
prometheus = { version = "0.13.2", features = ["process"] }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
rayon = "1.5.3"
|
rayon = "1.5.3"
|
||||||
regex = "1.6.0"
|
regex = "1.6.0"
|
||||||
@ -107,7 +107,6 @@ zip = { version = "0.6.2", optional = true }
|
|||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
|
default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
|
||||||
metrics = ["prometheus"]
|
|
||||||
analytics = ["segment"]
|
analytics = ["segment"]
|
||||||
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
|
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
|
||||||
chinese = ["meilisearch-types/chinese"]
|
chinese = ["meilisearch-types/chinese"]
|
||||||
|
@ -26,6 +26,18 @@ impl SearchAggregator {
|
|||||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct MultiSearchAggregator;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
impl MultiSearchAggregator {
|
||||||
|
pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self) {}
|
||||||
|
}
|
||||||
|
|
||||||
impl MockAnalytics {
|
impl MockAnalytics {
|
||||||
#[allow(clippy::new_ret_no_self)]
|
#[allow(clippy::new_ret_no_self)]
|
||||||
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
|
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
|
||||||
@ -43,6 +55,7 @@ impl Analytics for MockAnalytics {
|
|||||||
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
|
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
|
||||||
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
||||||
fn post_search(&self, _aggregate: super::SearchAggregator) {}
|
fn post_search(&self, _aggregate: super::SearchAggregator) {}
|
||||||
|
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
|
||||||
fn add_documents(
|
fn add_documents(
|
||||||
&self,
|
&self,
|
||||||
_documents_query: &UpdateDocumentsQuery,
|
_documents_query: &UpdateDocumentsQuery,
|
||||||
|
@ -23,6 +23,8 @@ use crate::routes::tasks::TasksFilterQuery;
|
|||||||
pub type SegmentAnalytics = mock_analytics::MockAnalytics;
|
pub type SegmentAnalytics = mock_analytics::MockAnalytics;
|
||||||
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||||
pub type SearchAggregator = mock_analytics::SearchAggregator;
|
pub type SearchAggregator = mock_analytics::SearchAggregator;
|
||||||
|
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||||
|
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
|
||||||
|
|
||||||
// if we are in release mode and the feature analytics was enabled
|
// if we are in release mode and the feature analytics was enabled
|
||||||
// we use the real analytics
|
// we use the real analytics
|
||||||
@ -30,6 +32,8 @@ pub type SearchAggregator = mock_analytics::SearchAggregator;
|
|||||||
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
||||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||||
pub type SearchAggregator = segment_analytics::SearchAggregator;
|
pub type SearchAggregator = segment_analytics::SearchAggregator;
|
||||||
|
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||||
|
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
|
||||||
|
|
||||||
/// The Meilisearch config dir:
|
/// The Meilisearch config dir:
|
||||||
/// `~/.config/Meilisearch` on *NIX or *BSD.
|
/// `~/.config/Meilisearch` on *NIX or *BSD.
|
||||||
@ -74,6 +78,9 @@ pub trait Analytics: Sync + Send {
|
|||||||
/// This method should be called to aggregate a post search
|
/// This method should be called to aggregate a post search
|
||||||
fn post_search(&self, aggregate: SearchAggregator);
|
fn post_search(&self, aggregate: SearchAggregator);
|
||||||
|
|
||||||
|
/// This method should be called to aggregate a post array of searches
|
||||||
|
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
|
||||||
|
|
||||||
// this method should be called to aggregate a add documents request
|
// this method should be called to aggregate a add documents request
|
||||||
fn add_documents(
|
fn add_documents(
|
||||||
&self,
|
&self,
|
||||||
|
@ -9,7 +9,7 @@ use actix_web::HttpRequest;
|
|||||||
use byte_unit::Byte;
|
use byte_unit::Byte;
|
||||||
use http::header::CONTENT_TYPE;
|
use http::header::CONTENT_TYPE;
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use meilisearch_auth::{AuthController, SearchRules};
|
use meilisearch_auth::{AuthController, AuthFilter};
|
||||||
use meilisearch_types::InstanceUid;
|
use meilisearch_types::InstanceUid;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
@ -30,7 +30,7 @@ use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
|||||||
use crate::routes::tasks::TasksFilterQuery;
|
use crate::routes::tasks::TasksFilterQuery;
|
||||||
use crate::routes::{create_all_stats, Stats};
|
use crate::routes::{create_all_stats, Stats};
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||||
};
|
};
|
||||||
use crate::Opt;
|
use crate::Opt;
|
||||||
@ -68,6 +68,7 @@ pub enum AnalyticsMsg {
|
|||||||
BatchMessage(Track),
|
BatchMessage(Track),
|
||||||
AggregateGetSearch(SearchAggregator),
|
AggregateGetSearch(SearchAggregator),
|
||||||
AggregatePostSearch(SearchAggregator),
|
AggregatePostSearch(SearchAggregator),
|
||||||
|
AggregatePostMultiSearch(MultiSearchAggregator),
|
||||||
AggregateAddDocuments(DocumentsAggregator),
|
AggregateAddDocuments(DocumentsAggregator),
|
||||||
AggregateDeleteDocuments(DocumentsDeletionAggregator),
|
AggregateDeleteDocuments(DocumentsDeletionAggregator),
|
||||||
AggregateUpdateDocuments(DocumentsAggregator),
|
AggregateUpdateDocuments(DocumentsAggregator),
|
||||||
@ -133,6 +134,7 @@ impl SegmentAnalytics {
|
|||||||
opt: opt.clone(),
|
opt: opt.clone(),
|
||||||
batcher,
|
batcher,
|
||||||
post_search_aggregator: SearchAggregator::default(),
|
post_search_aggregator: SearchAggregator::default(),
|
||||||
|
post_multi_search_aggregator: MultiSearchAggregator::default(),
|
||||||
get_search_aggregator: SearchAggregator::default(),
|
get_search_aggregator: SearchAggregator::default(),
|
||||||
add_documents_aggregator: DocumentsAggregator::default(),
|
add_documents_aggregator: DocumentsAggregator::default(),
|
||||||
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
|
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
|
||||||
@ -174,6 +176,10 @@ impl super::Analytics for SegmentAnalytics {
|
|||||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
|
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
|
||||||
|
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
|
||||||
|
}
|
||||||
|
|
||||||
fn add_documents(
|
fn add_documents(
|
||||||
&self,
|
&self,
|
||||||
documents_query: &UpdateDocumentsQuery,
|
documents_query: &UpdateDocumentsQuery,
|
||||||
@ -218,6 +224,7 @@ impl super::Analytics for SegmentAnalytics {
|
|||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
struct Infos {
|
struct Infos {
|
||||||
env: String,
|
env: String,
|
||||||
|
experimental_enable_metrics: bool,
|
||||||
db_path: bool,
|
db_path: bool,
|
||||||
import_dump: bool,
|
import_dump: bool,
|
||||||
dump_dir: bool,
|
dump_dir: bool,
|
||||||
@ -250,6 +257,7 @@ impl From<Opt> for Infos {
|
|||||||
// Thus we must not insert `..` at the end.
|
// Thus we must not insert `..` at the end.
|
||||||
let Opt {
|
let Opt {
|
||||||
db_path,
|
db_path,
|
||||||
|
experimental_enable_metrics,
|
||||||
http_addr,
|
http_addr,
|
||||||
master_key: _,
|
master_key: _,
|
||||||
env,
|
env,
|
||||||
@ -284,12 +292,14 @@ impl From<Opt> for Infos {
|
|||||||
ScheduleSnapshot::Enabled(interval) => Some(interval),
|
ScheduleSnapshot::Enabled(interval) => Some(interval),
|
||||||
};
|
};
|
||||||
|
|
||||||
let IndexerOpts { max_indexing_memory, max_indexing_threads } = indexer_options;
|
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
|
||||||
|
indexer_options;
|
||||||
|
|
||||||
// We're going to override every sensible information.
|
// We're going to override every sensible information.
|
||||||
// We consider information sensible if it contains a path, an address, or a key.
|
// We consider information sensible if it contains a path, an address, or a key.
|
||||||
Self {
|
Self {
|
||||||
env,
|
env,
|
||||||
|
experimental_enable_metrics,
|
||||||
db_path: db_path != PathBuf::from("./data.ms"),
|
db_path: db_path != PathBuf::from("./data.ms"),
|
||||||
import_dump: import_dump.is_some(),
|
import_dump: import_dump.is_some(),
|
||||||
dump_dir: dump_dir != PathBuf::from("dumps/"),
|
dump_dir: dump_dir != PathBuf::from("dumps/"),
|
||||||
@ -324,6 +334,7 @@ pub struct Segment {
|
|||||||
batcher: AutoBatcher,
|
batcher: AutoBatcher,
|
||||||
get_search_aggregator: SearchAggregator,
|
get_search_aggregator: SearchAggregator,
|
||||||
post_search_aggregator: SearchAggregator,
|
post_search_aggregator: SearchAggregator,
|
||||||
|
post_multi_search_aggregator: MultiSearchAggregator,
|
||||||
add_documents_aggregator: DocumentsAggregator,
|
add_documents_aggregator: DocumentsAggregator,
|
||||||
delete_documents_aggregator: DocumentsDeletionAggregator,
|
delete_documents_aggregator: DocumentsDeletionAggregator,
|
||||||
update_documents_aggregator: DocumentsAggregator,
|
update_documents_aggregator: DocumentsAggregator,
|
||||||
@ -381,6 +392,7 @@ impl Segment {
|
|||||||
Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
|
Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
|
||||||
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
|
||||||
|
Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
||||||
@ -399,7 +411,7 @@ impl Segment {
|
|||||||
auth_controller: AuthController,
|
auth_controller: AuthController,
|
||||||
) {
|
) {
|
||||||
if let Ok(stats) =
|
if let Ok(stats) =
|
||||||
create_all_stats(index_scheduler.into(), auth_controller, &SearchRules::default())
|
create_all_stats(index_scheduler.into(), auth_controller, &AuthFilter::default())
|
||||||
{
|
{
|
||||||
// Replace the version number with the prototype name if any.
|
// Replace the version number with the prototype name if any.
|
||||||
let version = if let Some(prototype) = crate::prototype_name() {
|
let version = if let Some(prototype) = crate::prototype_name() {
|
||||||
@ -426,6 +438,8 @@ impl Segment {
|
|||||||
.into_event(&self.user, "Documents Searched GET");
|
.into_event(&self.user, "Documents Searched GET");
|
||||||
let post_search = std::mem::take(&mut self.post_search_aggregator)
|
let post_search = std::mem::take(&mut self.post_search_aggregator)
|
||||||
.into_event(&self.user, "Documents Searched POST");
|
.into_event(&self.user, "Documents Searched POST");
|
||||||
|
let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
|
||||||
|
.into_event(&self.user, "Documents Searched by Multi-Search POST");
|
||||||
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
|
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
|
||||||
.into_event(&self.user, "Documents Added");
|
.into_event(&self.user, "Documents Added");
|
||||||
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
|
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
|
||||||
@ -443,6 +457,9 @@ impl Segment {
|
|||||||
if let Some(post_search) = post_search {
|
if let Some(post_search) = post_search {
|
||||||
let _ = self.batcher.push(post_search).await;
|
let _ = self.batcher.push(post_search).await;
|
||||||
}
|
}
|
||||||
|
if let Some(post_multi_search) = post_multi_search {
|
||||||
|
let _ = self.batcher.push(post_multi_search).await;
|
||||||
|
}
|
||||||
if let Some(add_documents) = add_documents {
|
if let Some(add_documents) = add_documents {
|
||||||
let _ = self.batcher.push(add_documents).await;
|
let _ = self.batcher.push(add_documents).await;
|
||||||
}
|
}
|
||||||
@ -483,6 +500,7 @@ pub struct SearchAggregator {
|
|||||||
|
|
||||||
// filter
|
// filter
|
||||||
filter_with_geo_radius: bool,
|
filter_with_geo_radius: bool,
|
||||||
|
filter_with_geo_bounding_box: bool,
|
||||||
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
||||||
filter_sum_of_criteria_terms: usize,
|
filter_sum_of_criteria_terms: usize,
|
||||||
// every time a request has a filter, this field must be incremented by one
|
// every time a request has a filter, this field must be incremented by one
|
||||||
@ -550,6 +568,7 @@ impl SearchAggregator {
|
|||||||
|
|
||||||
let stringified_filters = filter.to_string();
|
let stringified_filters = filter.to_string();
|
||||||
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
||||||
|
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
||||||
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -609,6 +628,7 @@ impl SearchAggregator {
|
|||||||
|
|
||||||
// filter
|
// filter
|
||||||
self.filter_with_geo_radius |= other.filter_with_geo_radius;
|
self.filter_with_geo_radius |= other.filter_with_geo_radius;
|
||||||
|
self.filter_with_geo_bounding_box |= other.filter_with_geo_bounding_box;
|
||||||
self.filter_sum_of_criteria_terms =
|
self.filter_sum_of_criteria_terms =
|
||||||
self.filter_sum_of_criteria_terms.saturating_add(other.filter_sum_of_criteria_terms);
|
self.filter_sum_of_criteria_terms.saturating_add(other.filter_sum_of_criteria_terms);
|
||||||
self.filter_total_number_of_criteria = self
|
self.filter_total_number_of_criteria = self
|
||||||
@ -676,6 +696,7 @@ impl SearchAggregator {
|
|||||||
},
|
},
|
||||||
"filter": {
|
"filter": {
|
||||||
"with_geoRadius": self.filter_with_geo_radius,
|
"with_geoRadius": self.filter_with_geo_radius,
|
||||||
|
"with_geoBoundingBox": self.filter_with_geo_bounding_box,
|
||||||
"avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64),
|
"avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64),
|
||||||
"most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
"most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||||
},
|
},
|
||||||
@ -716,6 +737,118 @@ impl SearchAggregator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct MultiSearchAggregator {
|
||||||
|
timestamp: Option<OffsetDateTime>,
|
||||||
|
|
||||||
|
// requests
|
||||||
|
total_received: usize,
|
||||||
|
total_succeeded: usize,
|
||||||
|
|
||||||
|
// sum of the number of distinct indexes in each single request, use with total_received to compute an avg
|
||||||
|
total_distinct_index_count: usize,
|
||||||
|
// number of queries with a single index, use with total_received to compute a proportion
|
||||||
|
total_single_index: usize,
|
||||||
|
|
||||||
|
// sum of the number of search queries in the requests, use with total_received to compute an average
|
||||||
|
total_search_count: usize,
|
||||||
|
|
||||||
|
// context
|
||||||
|
user_agents: HashSet<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MultiSearchAggregator {
|
||||||
|
pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self {
|
||||||
|
let timestamp = Some(OffsetDateTime::now_utc());
|
||||||
|
|
||||||
|
let user_agents = extract_user_agents(request).into_iter().collect();
|
||||||
|
|
||||||
|
let distinct_indexes: HashSet<_> =
|
||||||
|
query.iter().map(|query| query.index_uid.as_str()).collect();
|
||||||
|
|
||||||
|
Self {
|
||||||
|
timestamp,
|
||||||
|
total_received: 1,
|
||||||
|
total_succeeded: 0,
|
||||||
|
total_distinct_index_count: distinct_indexes.len(),
|
||||||
|
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
|
||||||
|
total_search_count: query.len(),
|
||||||
|
user_agents,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self) {
|
||||||
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn aggregate(&mut self, other: Self) {
|
||||||
|
// write the aggregate in a way that will cause a compilation error if a field is added.
|
||||||
|
|
||||||
|
// get ownership of self, replacing it by a default value.
|
||||||
|
let this = std::mem::take(self);
|
||||||
|
|
||||||
|
let timestamp = this.timestamp.or(other.timestamp);
|
||||||
|
let total_received = this.total_received.saturating_add(other.total_received);
|
||||||
|
let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded);
|
||||||
|
let total_distinct_index_count =
|
||||||
|
this.total_distinct_index_count.saturating_add(other.total_distinct_index_count);
|
||||||
|
let total_single_index = this.total_single_index.saturating_add(other.total_single_index);
|
||||||
|
let total_search_count = this.total_search_count.saturating_add(other.total_search_count);
|
||||||
|
let mut user_agents = this.user_agents;
|
||||||
|
|
||||||
|
for user_agent in other.user_agents.into_iter() {
|
||||||
|
user_agents.insert(user_agent);
|
||||||
|
}
|
||||||
|
|
||||||
|
// need all fields or compile error
|
||||||
|
let mut aggregated = Self {
|
||||||
|
timestamp,
|
||||||
|
total_received,
|
||||||
|
total_succeeded,
|
||||||
|
total_distinct_index_count,
|
||||||
|
total_single_index,
|
||||||
|
total_search_count,
|
||||||
|
user_agents,
|
||||||
|
// do not add _ or ..Default::default() here
|
||||||
|
};
|
||||||
|
|
||||||
|
// replace the default self with the aggregated value
|
||||||
|
std::mem::swap(self, &mut aggregated);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||||
|
if self.total_received == 0 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let properties = json!({
|
||||||
|
"user-agent": self.user_agents,
|
||||||
|
"requests": {
|
||||||
|
"total_succeeded": self.total_succeeded,
|
||||||
|
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
|
||||||
|
"total_received": self.total_received,
|
||||||
|
},
|
||||||
|
"indexes": {
|
||||||
|
"total_single_index": self.total_single_index,
|
||||||
|
"total_distinct_index_count": self.total_distinct_index_count,
|
||||||
|
"avg_distinct_index_count": (self.total_distinct_index_count as f64) / (self.total_received as f64), // not 0 else returned early
|
||||||
|
},
|
||||||
|
"searches": {
|
||||||
|
"total_search_count": self.total_search_count,
|
||||||
|
"avg_search_count": (self.total_search_count as f64) / (self.total_received as f64),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Some(Track {
|
||||||
|
timestamp: self.timestamp,
|
||||||
|
user: user.clone(),
|
||||||
|
event: event_name.to_string(),
|
||||||
|
properties,
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct DocumentsAggregator {
|
pub struct DocumentsAggregator {
|
||||||
timestamp: Option<OffsetDateTime>,
|
timestamp: Option<OffsetDateTime>,
|
||||||
|
@ -136,6 +136,13 @@ pub mod policies {
|
|||||||
|
|
||||||
use crate::extractors::authentication::Policy;
|
use crate::extractors::authentication::Policy;
|
||||||
|
|
||||||
|
enum TenantTokenOutcome {
|
||||||
|
NotATenantToken,
|
||||||
|
Invalid,
|
||||||
|
Expired,
|
||||||
|
Valid(Uuid, SearchRules),
|
||||||
|
}
|
||||||
|
|
||||||
fn tenant_token_validation() -> Validation {
|
fn tenant_token_validation() -> Validation {
|
||||||
let mut validation = Validation::default();
|
let mut validation = Validation::default();
|
||||||
validation.validate_exp = false;
|
validation.validate_exp = false;
|
||||||
@ -164,29 +171,42 @@ pub mod policies {
|
|||||||
pub struct ActionPolicy<const A: u8>;
|
pub struct ActionPolicy<const A: u8>;
|
||||||
|
|
||||||
impl<const A: u8> Policy for ActionPolicy<A> {
|
impl<const A: u8> Policy for ActionPolicy<A> {
|
||||||
|
/// Attempts to grant authentication from a bearer token (that can be a tenant token or an API key), the requested Action,
|
||||||
|
/// and a list of requested indexes.
|
||||||
|
///
|
||||||
|
/// If the bearer token is not allowed for the specified indexes and action, returns `None`.
|
||||||
|
/// Otherwise, returns an object containing the generated permissions: the search filters to add to a search, and the list of allowed indexes
|
||||||
|
/// (that may contain more indexes than requested).
|
||||||
fn authenticate(
|
fn authenticate(
|
||||||
auth: AuthController,
|
auth: AuthController,
|
||||||
token: &str,
|
token: &str,
|
||||||
index: Option<&str>,
|
index: Option<&str>,
|
||||||
) -> Option<AuthFilter> {
|
) -> Option<AuthFilter> {
|
||||||
// authenticate if token is the master key.
|
// authenticate if token is the master key.
|
||||||
// master key can only have access to keys routes.
|
// Without a master key, all routes are accessible except the key-related routes.
|
||||||
// if master key is None only keys routes are inaccessible.
|
|
||||||
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
|
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
|
||||||
return Some(AuthFilter::default());
|
return Some(AuthFilter::default());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tenant token
|
let (key_uuid, search_rules) =
|
||||||
if let Some(filters) = ActionPolicy::<A>::authenticate_tenant_token(&auth, token, index)
|
match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
|
||||||
{
|
TenantTokenOutcome::Valid(key_uuid, search_rules) => {
|
||||||
return Some(filters);
|
(key_uuid, Some(search_rules))
|
||||||
} else if let Some(action) = Action::from_repr(A) {
|
|
||||||
// API key
|
|
||||||
if let Ok(Some(uid)) = auth.get_optional_uid_from_encoded_key(token.as_bytes()) {
|
|
||||||
if let Ok(true) = auth.is_key_authorized(uid, action, index) {
|
|
||||||
return auth.get_key_filters(uid, None).ok();
|
|
||||||
}
|
}
|
||||||
}
|
TenantTokenOutcome::Expired => return None,
|
||||||
|
TenantTokenOutcome::Invalid => return None,
|
||||||
|
TenantTokenOutcome::NotATenantToken => {
|
||||||
|
(auth.get_optional_uid_from_encoded_key(token.as_bytes()).ok()??, None)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// check that the indexes are allowed
|
||||||
|
let action = Action::from_repr(A)?;
|
||||||
|
let auth_filter = auth.get_key_filters(key_uuid, search_rules).ok()?;
|
||||||
|
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false)
|
||||||
|
&& index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true)
|
||||||
|
{
|
||||||
|
return Some(auth_filter);
|
||||||
}
|
}
|
||||||
|
|
||||||
None
|
None
|
||||||
@ -194,50 +214,43 @@ pub mod policies {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<const A: u8> ActionPolicy<A> {
|
impl<const A: u8> ActionPolicy<A> {
|
||||||
fn authenticate_tenant_token(
|
fn authenticate_tenant_token(auth: &AuthController, token: &str) -> TenantTokenOutcome {
|
||||||
auth: &AuthController,
|
|
||||||
token: &str,
|
|
||||||
index: Option<&str>,
|
|
||||||
) -> Option<AuthFilter> {
|
|
||||||
// A tenant token only has access to the search route which always defines an index.
|
|
||||||
let index = index?;
|
|
||||||
|
|
||||||
// Only search action can be accessed by a tenant token.
|
// Only search action can be accessed by a tenant token.
|
||||||
if A != actions::SEARCH {
|
if A != actions::SEARCH {
|
||||||
return None;
|
return TenantTokenOutcome::NotATenantToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
let uid = extract_key_id(token)?;
|
let uid = if let Some(uid) = extract_key_id(token) {
|
||||||
// check if parent key is authorized to do the action.
|
uid
|
||||||
if auth.is_key_authorized(uid, Action::Search, Some(index)).ok()? {
|
} else {
|
||||||
// Check if tenant token is valid.
|
return TenantTokenOutcome::NotATenantToken;
|
||||||
let key = auth.generate_key(uid)?;
|
};
|
||||||
let data = decode::<Claims>(
|
|
||||||
token,
|
|
||||||
&DecodingKey::from_secret(key.as_bytes()),
|
|
||||||
&tenant_token_validation(),
|
|
||||||
)
|
|
||||||
.ok()?;
|
|
||||||
|
|
||||||
// Check index access if an index restriction is provided.
|
// Check if tenant token is valid.
|
||||||
if !data.claims.search_rules.is_index_authorized(index) {
|
let key = if let Some(key) = auth.generate_key(uid) {
|
||||||
return None;
|
key
|
||||||
|
} else {
|
||||||
|
return TenantTokenOutcome::Invalid;
|
||||||
|
};
|
||||||
|
|
||||||
|
let data = if let Ok(data) = decode::<Claims>(
|
||||||
|
token,
|
||||||
|
&DecodingKey::from_secret(key.as_bytes()),
|
||||||
|
&tenant_token_validation(),
|
||||||
|
) {
|
||||||
|
data
|
||||||
|
} else {
|
||||||
|
return TenantTokenOutcome::Invalid;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check if token is expired.
|
||||||
|
if let Some(exp) = data.claims.exp {
|
||||||
|
if OffsetDateTime::now_utc().unix_timestamp() > exp {
|
||||||
|
return TenantTokenOutcome::Expired;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if token is expired.
|
|
||||||
if let Some(exp) = data.claims.exp {
|
|
||||||
if OffsetDateTime::now_utc().unix_timestamp() > exp {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return match auth.get_key_filters(uid, Some(data.claims.search_rules)) {
|
|
||||||
Ok(auth) if auth.search_rules.is_index_authorized(index) => Some(auth),
|
|
||||||
_ => None,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
None
|
TenantTokenOutcome::Valid(uid, data.claims.search_rules)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,15 +4,12 @@ pub mod error;
|
|||||||
pub mod analytics;
|
pub mod analytics;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod extractors;
|
pub mod extractors;
|
||||||
|
pub mod metrics;
|
||||||
|
pub mod middleware;
|
||||||
pub mod option;
|
pub mod option;
|
||||||
pub mod routes;
|
pub mod routes;
|
||||||
pub mod search;
|
pub mod search;
|
||||||
|
|
||||||
#[cfg(feature = "metrics")]
|
|
||||||
pub mod metrics;
|
|
||||||
#[cfg(feature = "metrics")]
|
|
||||||
pub mod route_metrics;
|
|
||||||
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufReader, BufWriter};
|
use std::io::{BufReader, BufWriter};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@ -25,7 +22,7 @@ use actix_http::body::MessageBody;
|
|||||||
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
||||||
use actix_web::error::JsonPayloadError;
|
use actix_web::error::JsonPayloadError;
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use actix_web::{middleware, web, HttpRequest};
|
use actix_web::{web, HttpRequest};
|
||||||
use analytics::Analytics;
|
use analytics::Analytics;
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
use error::PayloadError;
|
use error::PayloadError;
|
||||||
@ -45,6 +42,34 @@ use option::ScheduleSnapshot;
|
|||||||
|
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
|
|
||||||
|
/// Default number of simultaneously opened indexes.
|
||||||
|
///
|
||||||
|
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
|
||||||
|
///
|
||||||
|
/// Lower for Windows that dedicates a smaller virtual address space to processes.
|
||||||
|
///
|
||||||
|
/// The value was chosen this way:
|
||||||
|
///
|
||||||
|
/// - Windows provides a small virtual address space of about 10TiB to processes.
|
||||||
|
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
|
||||||
|
#[cfg(windows)]
|
||||||
|
const DEFAULT_INDEX_COUNT: usize = 4;
|
||||||
|
|
||||||
|
/// Default number of simultaneously opened indexes.
|
||||||
|
///
|
||||||
|
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
|
||||||
|
///
|
||||||
|
/// The higher, the better for avoiding reopening indexes.
|
||||||
|
///
|
||||||
|
/// The value was chosen this way:
|
||||||
|
///
|
||||||
|
/// - Opening an index consumes a file descriptor.
|
||||||
|
/// - The default on many unices is about 256 file descriptors for a process.
|
||||||
|
/// - 100 is a little bit less than half this value.
|
||||||
|
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
const DEFAULT_INDEX_COUNT: usize = 20;
|
||||||
|
|
||||||
/// Check if a db is empty. It does not provide any information on the
|
/// Check if a db is empty. It does not provide any information on the
|
||||||
/// validity of the data in it.
|
/// validity of the data in it.
|
||||||
/// We consider a database as non empty when it's a non empty directory.
|
/// We consider a database as non empty when it's a non empty directory.
|
||||||
@ -86,13 +111,13 @@ pub fn create_app(
|
|||||||
analytics.clone(),
|
analytics.clone(),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.configure(routes::configure)
|
.configure(|cfg| routes::configure(cfg, opt.experimental_enable_metrics))
|
||||||
.configure(|s| dashboard(s, enable_dashboard));
|
.configure(|s| dashboard(s, enable_dashboard));
|
||||||
#[cfg(feature = "metrics")]
|
|
||||||
let app = app.configure(|s| configure_metrics_route(s, opt.enable_metrics_route));
|
|
||||||
|
|
||||||
#[cfg(feature = "metrics")]
|
let app = app.wrap(actix_web::middleware::Condition::new(
|
||||||
let app = app.wrap(Condition::new(opt.enable_metrics_route, route_metrics::RouteMetrics));
|
opt.experimental_enable_metrics,
|
||||||
|
middleware::RouteMetrics,
|
||||||
|
));
|
||||||
app.wrap(
|
app.wrap(
|
||||||
Cors::default()
|
Cors::default()
|
||||||
.send_wildcard()
|
.send_wildcard()
|
||||||
@ -101,9 +126,9 @@ pub fn create_app(
|
|||||||
.allow_any_method()
|
.allow_any_method()
|
||||||
.max_age(86_400), // 24h
|
.max_age(86_400), // 24h
|
||||||
)
|
)
|
||||||
.wrap(middleware::Logger::default())
|
.wrap(actix_web::middleware::Logger::default())
|
||||||
.wrap(middleware::Compress::default())
|
.wrap(actix_web::middleware::Compress::default())
|
||||||
.wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim))
|
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
|
||||||
}
|
}
|
||||||
|
|
||||||
enum OnFailure {
|
enum OnFailure {
|
||||||
@ -205,9 +230,11 @@ fn open_or_create_database_unchecked(
|
|||||||
snapshots_path: opt.snapshot_dir.clone(),
|
snapshots_path: opt.snapshot_dir.clone(),
|
||||||
dumps_path: opt.dump_dir.clone(),
|
dumps_path: opt.dump_dir.clone(),
|
||||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
||||||
index_size: opt.max_index_size.get_bytes() as usize,
|
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||||
indexer_config: (&opt.indexer_options).try_into()?,
|
indexer_config: (&opt.indexer_options).try_into()?,
|
||||||
autobatching_enabled: true,
|
autobatching_enabled: true,
|
||||||
|
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||||
|
index_count: DEFAULT_INDEX_COUNT,
|
||||||
})?)
|
})?)
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -419,15 +446,6 @@ pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
|
|||||||
config.service(web::resource("/").route(web::get().to(routes::running)));
|
config.service(web::resource("/").route(web::get().to(routes::running)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "metrics")]
|
|
||||||
pub fn configure_metrics_route(config: &mut web::ServiceConfig, enable_metrics_route: bool) {
|
|
||||||
if enable_metrics_route {
|
|
||||||
config.service(
|
|
||||||
web::resource("/metrics").route(web::get().to(crate::route_metrics::get_metrics)),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses the output of
|
/// Parses the output of
|
||||||
/// [`VERGEN_GIT_SEMVER_LIGHTWEIGHT`](https://docs.rs/vergen/latest/vergen/struct.Git.html#instructions)
|
/// [`VERGEN_GIT_SEMVER_LIGHTWEIGHT`](https://docs.rs/vergen/latest/vergen/struct.Git.html#instructions)
|
||||||
/// as a prototype name.
|
/// as a prototype name.
|
||||||
|
@ -1,40 +1,11 @@
|
|||||||
|
//! Contains all the custom middleware used in meilisearch
|
||||||
|
|
||||||
use std::future::{ready, Ready};
|
use std::future::{ready, Ready};
|
||||||
|
|
||||||
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
|
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
|
||||||
use actix_web::http::header;
|
use actix_web::Error;
|
||||||
use actix_web::{Error, HttpResponse};
|
|
||||||
use futures_util::future::LocalBoxFuture;
|
use futures_util::future::LocalBoxFuture;
|
||||||
use meilisearch_auth::actions;
|
use prometheus::HistogramTimer;
|
||||||
use meilisearch_lib::MeiliSearch;
|
|
||||||
use meilisearch_types::error::ResponseError;
|
|
||||||
use prometheus::{Encoder, HistogramTimer, TextEncoder};
|
|
||||||
|
|
||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
|
||||||
use crate::extractors::authentication::GuardedData;
|
|
||||||
|
|
||||||
pub async fn get_metrics(
|
|
||||||
meilisearch: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, MeiliSearch>,
|
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
|
||||||
let search_rules = &meilisearch.filters().search_rules;
|
|
||||||
let response = meilisearch.get_all_stats(search_rules).await?;
|
|
||||||
|
|
||||||
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
|
|
||||||
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
|
|
||||||
|
|
||||||
for (index, value) in response.indexes.iter() {
|
|
||||||
crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
|
|
||||||
.with_label_values(&[index])
|
|
||||||
.set(value.number_of_documents as i64);
|
|
||||||
}
|
|
||||||
|
|
||||||
let encoder = TextEncoder::new();
|
|
||||||
let mut buffer = vec![];
|
|
||||||
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");
|
|
||||||
|
|
||||||
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
|
|
||||||
|
|
||||||
Ok(HttpResponse::Ok().insert_header(header::ContentType(mime::TEXT_PLAIN)).body(response))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct RouteMetrics;
|
pub struct RouteMetrics;
|
||||||
|
|
@ -47,8 +47,7 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
|
|||||||
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
|
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
|
||||||
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
|
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
|
||||||
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
||||||
#[cfg(feature = "metrics")]
|
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||||
const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE";
|
|
||||||
|
|
||||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||||
@ -65,11 +64,11 @@ const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
|
|||||||
const DEFAULT_LOG_EVERY_N: usize = 100_000;
|
const DEFAULT_LOG_EVERY_N: usize = 100_000;
|
||||||
|
|
||||||
// Each environment (index and task-db) is taking space in the virtual address space.
|
// Each environment (index and task-db) is taking space in the virtual address space.
|
||||||
//
|
// Ideally, indexes can occupy 2TiB each to avoid having to manually resize them.
|
||||||
// The size of the virtual address space is limited by the OS. About 100TB for Linux and about 10TB for Windows.
|
// The actual size of the virtual address space is computed at startup to determine how many 2TiB indexes can be
|
||||||
// This means that the number of indexes is limited to about 200 for Linux and about 20 for Windows.
|
// opened simultaneously.
|
||||||
pub const INDEX_SIZE: u64 = 536_870_912_000; // 500 GiB
|
pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB
|
||||||
pub const TASK_DB_SIZE: u64 = 10_737_418_240; // 10 GiB
|
pub const TASK_DB_SIZE: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
|
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
|
||||||
#[serde(rename_all = "UPPERCASE")]
|
#[serde(rename_all = "UPPERCASE")]
|
||||||
@ -287,11 +286,12 @@ pub struct Opt {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub log_level: LogLevel,
|
pub log_level: LogLevel,
|
||||||
|
|
||||||
/// Enables Prometheus metrics and /metrics route.
|
/// Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
|
||||||
#[cfg(feature = "metrics")]
|
///
|
||||||
#[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)]
|
/// Enables the Prometheus metrics on the `GET /metrics` endpoint.
|
||||||
|
#[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_METRICS)]
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub enable_metrics_route: bool,
|
pub experimental_enable_metrics: bool,
|
||||||
|
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
#[clap(flatten)]
|
#[clap(flatten)]
|
||||||
@ -384,8 +384,7 @@ impl Opt {
|
|||||||
config_file_path: _,
|
config_file_path: _,
|
||||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||||
no_analytics,
|
no_analytics,
|
||||||
#[cfg(feature = "metrics")]
|
experimental_enable_metrics: enable_metrics_route,
|
||||||
enable_metrics_route,
|
|
||||||
} = self;
|
} = self;
|
||||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||||
@ -423,13 +422,10 @@ impl Opt {
|
|||||||
|
|
||||||
export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir);
|
export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir);
|
||||||
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
|
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
|
||||||
#[cfg(feature = "metrics")]
|
export_to_env_if_not_present(
|
||||||
{
|
MEILI_EXPERIMENTAL_ENABLE_METRICS,
|
||||||
export_to_env_if_not_present(
|
enable_metrics_route.to_string(),
|
||||||
MEILI_ENABLE_METRICS_ROUTE,
|
);
|
||||||
enable_metrics_route.to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
indexer_options.export_to_env();
|
indexer_options.export_to_env();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -494,12 +490,21 @@ pub struct IndexerOpts {
|
|||||||
#[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)]
|
#[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)]
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub max_indexing_threads: MaxThreads,
|
pub max_indexing_threads: MaxThreads,
|
||||||
|
|
||||||
|
/// Whether or not we want to determine the budget of virtual memory address space we have available dynamically
|
||||||
|
/// (the default), or statically.
|
||||||
|
///
|
||||||
|
/// Determining the budget of virtual memory address space dynamically takes some time on some systems (such as macOS)
|
||||||
|
/// and may make tests non-deterministic, so we want to skip it in tests.
|
||||||
|
#[clap(skip)]
|
||||||
|
#[serde(skip)]
|
||||||
|
pub skip_index_budget: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexerOpts {
|
impl IndexerOpts {
|
||||||
/// Exports the values to their corresponding env vars if they are not set.
|
/// Exports the values to their corresponding env vars if they are not set.
|
||||||
pub fn export_to_env(self) {
|
pub fn export_to_env(self) {
|
||||||
let IndexerOpts { max_indexing_memory, max_indexing_threads } = self;
|
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = self;
|
||||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||||
export_to_env_if_not_present(
|
export_to_env_if_not_present(
|
||||||
MEILI_MAX_INDEXING_MEMORY,
|
MEILI_MAX_INDEXING_MEMORY,
|
||||||
@ -527,6 +532,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
|||||||
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
|
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
|
||||||
thread_pool: Some(thread_pool),
|
thread_pool: Some(thread_pool),
|
||||||
max_positions_per_attributes: None,
|
max_positions_per_attributes: None,
|
||||||
|
skip_index_budget: other.skip_index_budget,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -160,19 +160,19 @@ pub async fn get_all_documents(
|
|||||||
pub struct UpdateDocumentsQuery {
|
pub struct UpdateDocumentsQuery {
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidIndexPrimaryKey>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidIndexPrimaryKey>)]
|
||||||
pub primary_key: Option<String>,
|
pub primary_key: Option<String>,
|
||||||
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidIndexCsvDelimiter>, error = DeserrQueryParamError<InvalidIndexCsvDelimiter>)]
|
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidDocumentCsvDelimiter>, error = DeserrQueryParamError<InvalidDocumentCsvDelimiter>)]
|
||||||
pub csv_delimiter: Option<u8>,
|
pub csv_delimiter: Option<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_char_csv_delimiter(
|
fn from_char_csv_delimiter(
|
||||||
c: char,
|
c: char,
|
||||||
) -> Result<Option<u8>, DeserrQueryParamError<InvalidIndexCsvDelimiter>> {
|
) -> Result<Option<u8>, DeserrQueryParamError<InvalidDocumentCsvDelimiter>> {
|
||||||
if c.is_ascii() {
|
if c.is_ascii() {
|
||||||
Ok(Some(c as u8))
|
Ok(Some(c as u8))
|
||||||
} else {
|
} else {
|
||||||
Err(DeserrQueryParamError::new(
|
Err(DeserrQueryParamError::new(
|
||||||
format!("csv delimiter must be an ascii character. Found: `{}`", c),
|
format!("csv delimiter must be an ascii character. Found: `{}`", c),
|
||||||
Code::InvalidIndexCsvDelimiter,
|
Code::InvalidDocumentCsvDelimiter,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -192,7 +192,7 @@ pub async fn replace_documents(
|
|||||||
|
|
||||||
analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||||
|
|
||||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||||
let task = document_addition(
|
let task = document_addition(
|
||||||
extract_mime_type(&req)?,
|
extract_mime_type(&req)?,
|
||||||
index_scheduler,
|
index_scheduler,
|
||||||
@ -223,7 +223,7 @@ pub async fn update_documents(
|
|||||||
|
|
||||||
analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||||
|
|
||||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||||
let task = document_addition(
|
let task = document_addition(
|
||||||
extract_mime_type(&req)?,
|
extract_mime_type(&req)?,
|
||||||
index_scheduler,
|
index_scheduler,
|
||||||
|
@ -61,6 +61,8 @@ pub struct IndexView {
|
|||||||
|
|
||||||
impl IndexView {
|
impl IndexView {
|
||||||
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
|
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
|
||||||
|
// It is important that this function does not keep the Index handle or a clone of it, because
|
||||||
|
// `list_indexes` relies on this property to avoid opening all indexes at once.
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
Ok(IndexView {
|
Ok(IndexView {
|
||||||
uid,
|
uid,
|
||||||
@ -89,14 +91,16 @@ pub async fn list_indexes(
|
|||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
|
||||||
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
|
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let search_rules = &index_scheduler.filters().search_rules;
|
let filters = index_scheduler.filters();
|
||||||
let indexes: Vec<_> = index_scheduler.indexes()?;
|
let indexes: Vec<Option<IndexView>> =
|
||||||
let indexes = indexes
|
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
|
||||||
.into_iter()
|
if !filters.is_index_authorized(uid) {
|
||||||
.filter(|(name, _)| search_rules.is_index_authorized(name))
|
return Ok(None);
|
||||||
.map(|(name, index)| IndexView::new(name, &index))
|
}
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
Ok(Some(IndexView::new(uid.to_string(), index)?))
|
||||||
|
})?;
|
||||||
|
// Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
|
||||||
|
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
|
||||||
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
|
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
|
||||||
|
|
||||||
debug!("returns: {:?}", ret);
|
debug!("returns: {:?}", ret);
|
||||||
@ -120,7 +124,7 @@ pub async fn create_index(
|
|||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let IndexCreateRequest { primary_key, uid } = body.into_inner();
|
let IndexCreateRequest { primary_key, uid } = body.into_inner();
|
||||||
|
|
||||||
let allow_index_creation = index_scheduler.filters().search_rules.is_index_authorized(&uid);
|
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
|
||||||
if allow_index_creation {
|
if allow_index_creation {
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
"Index Created".to_string(),
|
"Index Created".to_string(),
|
||||||
|
@ -3,7 +3,6 @@ use actix_web::{web, HttpRequest, HttpResponse};
|
|||||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use meilisearch_auth::IndexSearchRules;
|
|
||||||
use meilisearch_types::deserr::query_params::Param;
|
use meilisearch_types::deserr::query_params::Param;
|
||||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||||
use meilisearch_types::error::deserr_codes::*;
|
use meilisearch_types::error::deserr_codes::*;
|
||||||
@ -17,9 +16,9 @@ use crate::extractors::authentication::policies::*;
|
|||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||||
DEFAULT_SEARCH_OFFSET,
|
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
@ -101,26 +100,6 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Incorporate search rules in search query
|
|
||||||
fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
|
|
||||||
query.filter = match (query.filter.take(), rules.filter) {
|
|
||||||
(None, rules_filter) => rules_filter,
|
|
||||||
(filter, None) => filter,
|
|
||||||
(Some(filter), Some(rules_filter)) => {
|
|
||||||
let filter = match filter {
|
|
||||||
Value::Array(filter) => filter,
|
|
||||||
filter => vec![filter],
|
|
||||||
};
|
|
||||||
let rules_filter = match rules_filter {
|
|
||||||
Value::Array(rules_filter) => rules_filter,
|
|
||||||
rules_filter => vec![rules_filter],
|
|
||||||
};
|
|
||||||
|
|
||||||
Some(Value::Array([filter, rules_filter].concat()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
|
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
|
||||||
|
|
||||||
/// Transform the sort query parameter into something that matches the post expected format.
|
/// Transform the sort query parameter into something that matches the post expected format.
|
||||||
@ -159,9 +138,7 @@ pub async fn search_with_url_query(
|
|||||||
let mut query: SearchQuery = params.into_inner().into();
|
let mut query: SearchQuery = params.into_inner().into();
|
||||||
|
|
||||||
// Tenant token search_rules.
|
// Tenant token search_rules.
|
||||||
if let Some(search_rules) =
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||||
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
|
|
||||||
{
|
|
||||||
add_search_rules(&mut query, search_rules);
|
add_search_rules(&mut query, search_rules);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -193,9 +170,7 @@ pub async fn search_with_post(
|
|||||||
debug!("search called with params: {:?}", query);
|
debug!("search called with params: {:?}", query);
|
||||||
|
|
||||||
// Tenant token search_rules.
|
// Tenant token search_rules.
|
||||||
if let Some(search_rules) =
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||||
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
|
|
||||||
{
|
|
||||||
add_search_rules(&mut query, search_rules);
|
add_search_rules(&mut query, search_rules);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,7 +45,8 @@ macro_rules! make_setting_route {
|
|||||||
|
|
||||||
let new_settings = Settings { $attr: Setting::Reset.into(), ..Default::default() };
|
let new_settings = Settings { $attr: Setting::Reset.into(), ..Default::default() };
|
||||||
|
|
||||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
let allow_index_creation =
|
||||||
|
index_scheduler.filters().allow_index_creation(&index_uid);
|
||||||
|
|
||||||
let task = KindWithContent::SettingsUpdate {
|
let task = KindWithContent::SettingsUpdate {
|
||||||
index_uid: index_uid.to_string(),
|
index_uid: index_uid.to_string(),
|
||||||
@ -86,7 +87,8 @@ macro_rules! make_setting_route {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
let allow_index_creation =
|
||||||
|
index_scheduler.filters().allow_index_creation(&index_uid);
|
||||||
|
|
||||||
let task = KindWithContent::SettingsUpdate {
|
let task = KindWithContent::SettingsUpdate {
|
||||||
index_uid: index_uid.to_string(),
|
index_uid: index_uid.to_string(),
|
||||||
@ -560,7 +562,7 @@ pub async fn update_all(
|
|||||||
Some(&req),
|
Some(&req),
|
||||||
);
|
);
|
||||||
|
|
||||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
||||||
let task = KindWithContent::SettingsUpdate {
|
let task = KindWithContent::SettingsUpdate {
|
||||||
index_uid,
|
index_uid,
|
||||||
@ -596,7 +598,7 @@ pub async fn delete_all(
|
|||||||
|
|
||||||
let new_settings = Settings::cleared().into_unchecked();
|
let new_settings = Settings::cleared().into_unchecked();
|
||||||
|
|
||||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
||||||
let task = KindWithContent::SettingsUpdate {
|
let task = KindWithContent::SettingsUpdate {
|
||||||
index_uid,
|
index_uid,
|
||||||
|
50
meilisearch/src/routes/metrics.rs
Normal file
50
meilisearch/src/routes/metrics.rs
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
use actix_web::http::header;
|
||||||
|
use actix_web::web::{self, Data};
|
||||||
|
use actix_web::HttpResponse;
|
||||||
|
use index_scheduler::IndexScheduler;
|
||||||
|
use meilisearch_auth::AuthController;
|
||||||
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::keys::actions;
|
||||||
|
use prometheus::{Encoder, TextEncoder};
|
||||||
|
|
||||||
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
|
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||||
|
use crate::routes::create_all_stats;
|
||||||
|
|
||||||
|
pub fn configure(config: &mut web::ServiceConfig) {
|
||||||
|
config.service(web::resource("").route(web::get().to(get_metrics)));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_metrics(
|
||||||
|
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||||
|
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, AuthController>,
|
||||||
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
|
let auth_filters = index_scheduler.filters();
|
||||||
|
if !auth_filters.all_indexes_authorized() {
|
||||||
|
let mut error = ResponseError::from(AuthenticationError::InvalidToken);
|
||||||
|
error
|
||||||
|
.message
|
||||||
|
.push_str(" The API key for the `/metrics` route must allow access to all indexes.");
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
let response =
|
||||||
|
create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), auth_filters)?;
|
||||||
|
|
||||||
|
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
|
||||||
|
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
|
||||||
|
|
||||||
|
for (index, value) in response.indexes.iter() {
|
||||||
|
crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
|
||||||
|
.with_label_values(&[index])
|
||||||
|
.set(value.number_of_documents as i64);
|
||||||
|
}
|
||||||
|
|
||||||
|
let encoder = TextEncoder::new();
|
||||||
|
let mut buffer = vec![];
|
||||||
|
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");
|
||||||
|
|
||||||
|
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
|
||||||
|
|
||||||
|
Ok(HttpResponse::Ok().insert_header(header::ContentType(mime::TEXT_PLAIN)).body(response))
|
||||||
|
}
|
@ -22,10 +22,12 @@ const PAGINATION_DEFAULT_LIMIT: usize = 20;
|
|||||||
mod api_key;
|
mod api_key;
|
||||||
mod dump;
|
mod dump;
|
||||||
pub mod indexes;
|
pub mod indexes;
|
||||||
|
mod metrics;
|
||||||
|
mod multi_search;
|
||||||
mod swap_indexes;
|
mod swap_indexes;
|
||||||
pub mod tasks;
|
pub mod tasks;
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig, enable_metrics: bool) {
|
||||||
cfg.service(web::scope("/tasks").configure(tasks::configure))
|
cfg.service(web::scope("/tasks").configure(tasks::configure))
|
||||||
.service(web::resource("/health").route(web::get().to(get_health)))
|
.service(web::resource("/health").route(web::get().to(get_health)))
|
||||||
.service(web::scope("/keys").configure(api_key::configure))
|
.service(web::scope("/keys").configure(api_key::configure))
|
||||||
@ -33,7 +35,12 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
.service(web::resource("/stats").route(web::get().to(get_stats)))
|
.service(web::resource("/stats").route(web::get().to(get_stats)))
|
||||||
.service(web::resource("/version").route(web::get().to(get_version)))
|
.service(web::resource("/version").route(web::get().to(get_version)))
|
||||||
.service(web::scope("/indexes").configure(indexes::configure))
|
.service(web::scope("/indexes").configure(indexes::configure))
|
||||||
|
.service(web::scope("/multi-search").configure(multi_search::configure))
|
||||||
.service(web::scope("/swap-indexes").configure(swap_indexes::configure));
|
.service(web::scope("/swap-indexes").configure(swap_indexes::configure));
|
||||||
|
|
||||||
|
if enable_metrics {
|
||||||
|
cfg.service(web::scope("/metrics").configure(metrics::configure));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
@ -237,10 +244,9 @@ async fn get_stats(
|
|||||||
analytics: web::Data<dyn Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": false }), Some(&req));
|
analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": false }), Some(&req));
|
||||||
let search_rules = &index_scheduler.filters().search_rules;
|
let filters = index_scheduler.filters();
|
||||||
|
|
||||||
let stats =
|
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
|
||||||
create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), search_rules)?;
|
|
||||||
|
|
||||||
debug!("returns: {:?}", stats);
|
debug!("returns: {:?}", stats);
|
||||||
Ok(HttpResponse::Ok().json(stats))
|
Ok(HttpResponse::Ok().json(stats))
|
||||||
@ -249,20 +255,20 @@ async fn get_stats(
|
|||||||
pub fn create_all_stats(
|
pub fn create_all_stats(
|
||||||
index_scheduler: Data<IndexScheduler>,
|
index_scheduler: Data<IndexScheduler>,
|
||||||
auth_controller: AuthController,
|
auth_controller: AuthController,
|
||||||
search_rules: &meilisearch_auth::SearchRules,
|
filters: &meilisearch_auth::AuthFilter,
|
||||||
) -> Result<Stats, ResponseError> {
|
) -> Result<Stats, ResponseError> {
|
||||||
let mut last_task: Option<OffsetDateTime> = None;
|
let mut last_task: Option<OffsetDateTime> = None;
|
||||||
let mut indexes = BTreeMap::new();
|
let mut indexes = BTreeMap::new();
|
||||||
let mut database_size = 0;
|
let mut database_size = 0;
|
||||||
let processing_task = index_scheduler.get_tasks_from_authorized_indexes(
|
let processing_task = index_scheduler.get_tasks_from_authorized_indexes(
|
||||||
Query { statuses: Some(vec![Status::Processing]), limit: Some(1), ..Query::default() },
|
Query { statuses: Some(vec![Status::Processing]), limit: Some(1), ..Query::default() },
|
||||||
search_rules.authorized_indexes(),
|
filters,
|
||||||
)?;
|
)?;
|
||||||
// accumulate the size of each indexes
|
// accumulate the size of each indexes
|
||||||
let processing_index = processing_task.first().and_then(|task| task.index_uid());
|
let processing_index = processing_task.first().and_then(|task| task.index_uid());
|
||||||
for (name, index) in index_scheduler.indexes()? {
|
index_scheduler.try_for_each_index(|name, index| {
|
||||||
if !search_rules.is_index_authorized(&name) {
|
if !filters.is_index_authorized(name) {
|
||||||
continue;
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
database_size += index.on_disk_size()?;
|
database_size += index.on_disk_size()?;
|
||||||
@ -277,8 +283,9 @@ pub fn create_all_stats(
|
|||||||
let updated_at = index.updated_at(&rtxn)?;
|
let updated_at = index.updated_at(&rtxn)?;
|
||||||
last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at)));
|
last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at)));
|
||||||
|
|
||||||
indexes.insert(name, stats);
|
indexes.insert(name.to_string(), stats);
|
||||||
}
|
Ok(())
|
||||||
|
})?;
|
||||||
|
|
||||||
database_size += index_scheduler.size()?;
|
database_size += index_scheduler.size()?;
|
||||||
database_size += auth_controller.size()?;
|
database_size += auth_controller.size()?;
|
||||||
|
122
meilisearch/src/routes/multi_search.rs
Normal file
122
meilisearch/src/routes/multi_search.rs
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
use actix_http::StatusCode;
|
||||||
|
use actix_web::web::{self, Data};
|
||||||
|
use actix_web::{HttpRequest, HttpResponse};
|
||||||
|
use deserr::actix_web::AwebJson;
|
||||||
|
use index_scheduler::IndexScheduler;
|
||||||
|
use log::debug;
|
||||||
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::keys::actions;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::analytics::{Analytics, MultiSearchAggregator};
|
||||||
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
|
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||||
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
|
use crate::search::{
|
||||||
|
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
|
cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct SearchResults {
|
||||||
|
results: Vec<SearchResultWithIndex>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, deserr::Deserr)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct SearchQueries {
|
||||||
|
queries: Vec<SearchQueryWithIndex>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn multi_search_with_post(
|
||||||
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||||
|
params: AwebJson<SearchQueries, DeserrJsonError>,
|
||||||
|
req: HttpRequest,
|
||||||
|
analytics: web::Data<dyn Analytics>,
|
||||||
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
|
let queries = params.into_inner().queries;
|
||||||
|
|
||||||
|
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
|
||||||
|
|
||||||
|
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||||
|
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||||
|
// changes.
|
||||||
|
let search_results: Result<_, (ResponseError, usize)> = (|| {
|
||||||
|
async {
|
||||||
|
let mut search_results = Vec::with_capacity(queries.len());
|
||||||
|
for (query_index, (index_uid, mut query)) in
|
||||||
|
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
|
||||||
|
{
|
||||||
|
debug!("multi-search #{query_index}: called with params: {:?}", query);
|
||||||
|
|
||||||
|
// Check index from API key
|
||||||
|
if !index_scheduler.filters().is_index_authorized(&index_uid) {
|
||||||
|
return Err(AuthenticationError::InvalidToken).with_index(query_index);
|
||||||
|
}
|
||||||
|
// Apply search rules from tenant token
|
||||||
|
if let Some(search_rules) =
|
||||||
|
index_scheduler.filters().get_index_search_rules(&index_uid)
|
||||||
|
{
|
||||||
|
add_search_rules(&mut query, search_rules);
|
||||||
|
}
|
||||||
|
|
||||||
|
let index = index_scheduler
|
||||||
|
.index(&index_uid)
|
||||||
|
.map_err(|err| {
|
||||||
|
let mut err = ResponseError::from(err);
|
||||||
|
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||||
|
// here the resource not found is not part of the URL.
|
||||||
|
err.code = StatusCode::BAD_REQUEST;
|
||||||
|
err
|
||||||
|
})
|
||||||
|
.with_index(query_index)?;
|
||||||
|
let search_result =
|
||||||
|
tokio::task::spawn_blocking(move || perform_search(&index, query))
|
||||||
|
.await
|
||||||
|
.with_index(query_index)?;
|
||||||
|
|
||||||
|
search_results.push(SearchResultWithIndex {
|
||||||
|
index_uid: index_uid.into_inner(),
|
||||||
|
result: search_result.with_index(query_index)?,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Ok(search_results)
|
||||||
|
}
|
||||||
|
})()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if search_results.is_ok() {
|
||||||
|
multi_aggregate.succeed();
|
||||||
|
}
|
||||||
|
analytics.post_multi_search(multi_aggregate);
|
||||||
|
|
||||||
|
let search_results = search_results.map_err(|(mut err, query_index)| {
|
||||||
|
// Add the query index that failed as context for the error message.
|
||||||
|
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
|
||||||
|
// of result and we can benefit from static typing.
|
||||||
|
err.message = format!("Inside `.queries[{query_index}]`: {}", err.message);
|
||||||
|
err
|
||||||
|
})?;
|
||||||
|
|
||||||
|
debug!("returns: {:?}", search_results);
|
||||||
|
|
||||||
|
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Local `Result` extension trait to avoid `map_err` boilerplate.
|
||||||
|
trait WithIndex {
|
||||||
|
type T;
|
||||||
|
/// convert the error type inside of the `Result` to a `ResponseError`, and return a couple of it + the usize.
|
||||||
|
fn with_index(self, index: usize) -> Result<Self::T, (ResponseError, usize)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T, E: Into<ResponseError>> WithIndex for Result<T, E> {
|
||||||
|
type T = T;
|
||||||
|
fn with_index(self, index: usize) -> Result<T, (ResponseError, usize)> {
|
||||||
|
self.map_err(|err| (err.into(), index))
|
||||||
|
}
|
||||||
|
}
|
@ -42,7 +42,7 @@ pub async fn swap_indexes(
|
|||||||
}),
|
}),
|
||||||
Some(&req),
|
Some(&req),
|
||||||
);
|
);
|
||||||
let search_rules = &index_scheduler.filters().search_rules;
|
let filters = index_scheduler.filters();
|
||||||
|
|
||||||
let mut swaps = vec![];
|
let mut swaps = vec![];
|
||||||
for SwapIndexesPayload { indexes } in params.into_iter() {
|
for SwapIndexesPayload { indexes } in params.into_iter() {
|
||||||
@ -53,7 +53,7 @@ pub async fn swap_indexes(
|
|||||||
return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into());
|
return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if !search_rules.is_index_authorized(lhs) || !search_rules.is_index_authorized(rhs) {
|
if !filters.is_index_authorized(lhs) || !filters.is_index_authorized(rhs) {
|
||||||
return Err(AuthenticationError::InvalidToken.into());
|
return Err(AuthenticationError::InvalidToken.into());
|
||||||
}
|
}
|
||||||
swaps.push(IndexSwap { indexes: (lhs.to_string(), rhs.to_string()) });
|
swaps.push(IndexSwap { indexes: (lhs.to_string(), rhs.to_string()) });
|
||||||
|
@ -319,7 +319,7 @@ async fn cancel_tasks(
|
|||||||
let tasks = index_scheduler.get_task_ids_from_authorized_indexes(
|
let tasks = index_scheduler.get_task_ids_from_authorized_indexes(
|
||||||
&index_scheduler.read_txn()?,
|
&index_scheduler.read_txn()?,
|
||||||
&query,
|
&query,
|
||||||
&index_scheduler.filters().search_rules.authorized_indexes(),
|
index_scheduler.filters(),
|
||||||
)?;
|
)?;
|
||||||
let task_cancelation =
|
let task_cancelation =
|
||||||
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
|
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
|
||||||
@ -364,7 +364,7 @@ async fn delete_tasks(
|
|||||||
let tasks = index_scheduler.get_task_ids_from_authorized_indexes(
|
let tasks = index_scheduler.get_task_ids_from_authorized_indexes(
|
||||||
&index_scheduler.read_txn()?,
|
&index_scheduler.read_txn()?,
|
||||||
&query,
|
&query,
|
||||||
&index_scheduler.filters().search_rules.authorized_indexes(),
|
index_scheduler.filters(),
|
||||||
)?;
|
)?;
|
||||||
let task_deletion =
|
let task_deletion =
|
||||||
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
|
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
|
||||||
@ -398,10 +398,7 @@ async fn get_tasks(
|
|||||||
let query = params.into_query();
|
let query = params.into_query();
|
||||||
|
|
||||||
let mut tasks_results: Vec<TaskView> = index_scheduler
|
let mut tasks_results: Vec<TaskView> = index_scheduler
|
||||||
.get_tasks_from_authorized_indexes(
|
.get_tasks_from_authorized_indexes(query, index_scheduler.filters())?
|
||||||
query,
|
|
||||||
index_scheduler.filters().search_rules.authorized_indexes(),
|
|
||||||
)?
|
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|t| TaskView::from_task(&t))
|
.map(|t| TaskView::from_task(&t))
|
||||||
.collect();
|
.collect();
|
||||||
@ -439,12 +436,8 @@ async fn get_task(
|
|||||||
|
|
||||||
let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
|
let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
|
||||||
|
|
||||||
if let Some(task) = index_scheduler
|
if let Some(task) =
|
||||||
.get_tasks_from_authorized_indexes(
|
index_scheduler.get_tasks_from_authorized_indexes(query, index_scheduler.filters())?.first()
|
||||||
query,
|
|
||||||
index_scheduler.filters().search_rules.authorized_indexes(),
|
|
||||||
)?
|
|
||||||
.first()
|
|
||||||
{
|
{
|
||||||
let task_view = TaskView::from_task(task);
|
let task_view = TaskView::from_task(task);
|
||||||
Ok(HttpResponse::Ok().json(task_view))
|
Ok(HttpResponse::Ok().json(task_view))
|
||||||
|
@ -5,8 +5,10 @@ use std::time::Instant;
|
|||||||
|
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use either::Either;
|
use either::Either;
|
||||||
|
use meilisearch_auth::IndexSearchRules;
|
||||||
use meilisearch_types::deserr::DeserrJsonError;
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
use meilisearch_types::error::deserr_codes::*;
|
use meilisearch_types::error::deserr_codes::*;
|
||||||
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||||
use meilisearch_types::{milli, Document};
|
use meilisearch_types::{milli, Document};
|
||||||
use milli::tokenizer::TokenizerBuilder;
|
use milli::tokenizer::TokenizerBuilder;
|
||||||
@ -74,6 +76,100 @@ impl SearchQuery {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A `SearchQuery` + an index UID.
|
||||||
|
// This struct contains the fields of `SearchQuery` inline.
|
||||||
|
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
|
||||||
|
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct SearchQueryWithIndex {
|
||||||
|
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
|
||||||
|
pub index_uid: IndexUid,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||||
|
pub q: Option<String>,
|
||||||
|
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||||
|
pub offset: usize,
|
||||||
|
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||||
|
pub limit: usize,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
|
||||||
|
pub page: Option<usize>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
|
||||||
|
pub hits_per_page: Option<usize>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
|
||||||
|
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
|
||||||
|
pub attributes_to_crop: Option<Vec<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
|
||||||
|
pub crop_length: usize,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
|
||||||
|
pub attributes_to_highlight: Option<HashSet<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
|
||||||
|
pub show_matches_position: bool,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||||
|
pub filter: Option<Value>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||||
|
pub sort: Option<Vec<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
||||||
|
pub facets: Option<Vec<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
|
||||||
|
pub highlight_pre_tag: String,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
|
||||||
|
pub highlight_post_tag: String,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
|
||||||
|
pub crop_marker: String,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||||
|
pub matching_strategy: MatchingStrategy,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearchQueryWithIndex {
|
||||||
|
pub fn into_index_query(self) -> (IndexUid, SearchQuery) {
|
||||||
|
let SearchQueryWithIndex {
|
||||||
|
index_uid,
|
||||||
|
q,
|
||||||
|
offset,
|
||||||
|
limit,
|
||||||
|
page,
|
||||||
|
hits_per_page,
|
||||||
|
attributes_to_retrieve,
|
||||||
|
attributes_to_crop,
|
||||||
|
crop_length,
|
||||||
|
attributes_to_highlight,
|
||||||
|
show_matches_position,
|
||||||
|
filter,
|
||||||
|
sort,
|
||||||
|
facets,
|
||||||
|
highlight_pre_tag,
|
||||||
|
highlight_post_tag,
|
||||||
|
crop_marker,
|
||||||
|
matching_strategy,
|
||||||
|
} = self;
|
||||||
|
(
|
||||||
|
index_uid,
|
||||||
|
SearchQuery {
|
||||||
|
q,
|
||||||
|
offset,
|
||||||
|
limit,
|
||||||
|
page,
|
||||||
|
hits_per_page,
|
||||||
|
attributes_to_retrieve,
|
||||||
|
attributes_to_crop,
|
||||||
|
crop_length,
|
||||||
|
attributes_to_highlight,
|
||||||
|
show_matches_position,
|
||||||
|
filter,
|
||||||
|
sort,
|
||||||
|
facets,
|
||||||
|
highlight_pre_tag,
|
||||||
|
highlight_post_tag,
|
||||||
|
crop_marker,
|
||||||
|
matching_strategy,
|
||||||
|
// do not use ..Default::default() here,
|
||||||
|
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
|
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
|
||||||
#[deserr(rename_all = camelCase)]
|
#[deserr(rename_all = camelCase)]
|
||||||
pub enum MatchingStrategy {
|
pub enum MatchingStrategy {
|
||||||
@ -108,7 +204,7 @@ pub struct SearchHit {
|
|||||||
pub matches_position: Option<MatchesPosition>,
|
pub matches_position: Option<MatchesPosition>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct SearchResult {
|
pub struct SearchResult {
|
||||||
pub hits: Vec<SearchHit>,
|
pub hits: Vec<SearchHit>,
|
||||||
@ -118,6 +214,16 @@ pub struct SearchResult {
|
|||||||
pub hits_info: HitsInfo,
|
pub hits_info: HitsInfo,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
|
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct SearchResultWithIndex {
|
||||||
|
pub index_uid: String,
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub result: SearchResult,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
|
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
|
||||||
@ -129,6 +235,32 @@ pub enum HitsInfo {
|
|||||||
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
|
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
|
pub struct FacetStats {
|
||||||
|
pub min: f64,
|
||||||
|
pub max: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Incorporate search rules in search query
|
||||||
|
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
|
||||||
|
query.filter = match (query.filter.take(), rules.filter) {
|
||||||
|
(None, rules_filter) => rules_filter,
|
||||||
|
(filter, None) => filter,
|
||||||
|
(Some(filter), Some(rules_filter)) => {
|
||||||
|
let filter = match filter {
|
||||||
|
Value::Array(filter) => filter,
|
||||||
|
filter => vec![filter],
|
||||||
|
};
|
||||||
|
let rules_filter = match rules_filter {
|
||||||
|
Value::Array(rules_filter) => rules_filter,
|
||||||
|
rules_filter => vec![rules_filter],
|
||||||
|
};
|
||||||
|
|
||||||
|
Some(Value::Array([filter, rules_filter].concat()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn perform_search(
|
pub fn perform_search(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
query: SearchQuery,
|
query: SearchQuery,
|
||||||
@ -243,9 +375,10 @@ pub fn perform_search(
|
|||||||
&displayed_ids,
|
&displayed_ids,
|
||||||
);
|
);
|
||||||
|
|
||||||
let tokenizer = TokenizerBuilder::default().build();
|
let mut tokenizer_buidler = TokenizerBuilder::default();
|
||||||
|
tokenizer_buidler.create_char_map(true);
|
||||||
|
|
||||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
|
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_buidler.build());
|
||||||
formatter_builder.crop_marker(query.crop_marker);
|
formatter_builder.crop_marker(query.crop_marker);
|
||||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
||||||
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
||||||
@ -300,7 +433,7 @@ pub fn perform_search(
|
|||||||
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
|
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
|
||||||
};
|
};
|
||||||
|
|
||||||
let facet_distribution = match query.facets {
|
let (facet_distribution, facet_stats) = match query.facets {
|
||||||
Some(ref fields) => {
|
Some(ref fields) => {
|
||||||
let mut facet_distribution = index.facets_distribution(&rtxn);
|
let mut facet_distribution = index.facets_distribution(&rtxn);
|
||||||
|
|
||||||
@ -314,18 +447,23 @@ pub fn perform_search(
|
|||||||
facet_distribution.facets(fields);
|
facet_distribution.facets(fields);
|
||||||
}
|
}
|
||||||
let distribution = facet_distribution.candidates(candidates).execute()?;
|
let distribution = facet_distribution.candidates(candidates).execute()?;
|
||||||
|
let stats = facet_distribution.compute_stats()?;
|
||||||
Some(distribution)
|
(Some(distribution), Some(stats))
|
||||||
}
|
}
|
||||||
None => None,
|
None => (None, None),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let facet_stats = facet_stats.map(|stats| {
|
||||||
|
stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect()
|
||||||
|
});
|
||||||
|
|
||||||
let result = SearchResult {
|
let result = SearchResult {
|
||||||
hits: documents,
|
hits: documents,
|
||||||
hits_info,
|
hits_info,
|
||||||
query: query.q.clone().unwrap_or_default(),
|
query: query.q.clone().unwrap_or_default(),
|
||||||
processing_time_ms: before_search.elapsed().as_millis(),
|
processing_time_ms: before_search.elapsed().as_millis(),
|
||||||
facet_distribution,
|
facet_distribution,
|
||||||
|
facet_stats,
|
||||||
};
|
};
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,8 @@ use crate::common::Server;
|
|||||||
|
|
||||||
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
|
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
|
||||||
Lazy::new(|| {
|
Lazy::new(|| {
|
||||||
let mut authorizations = hashmap! {
|
let authorizations = hashmap! {
|
||||||
|
("POST", "/multi-search") => hashset!{"search", "*"},
|
||||||
("POST", "/indexes/products/search") => hashset!{"search", "*"},
|
("POST", "/indexes/products/search") => hashset!{"search", "*"},
|
||||||
("GET", "/indexes/products/search") => hashset!{"search", "*"},
|
("GET", "/indexes/products/search") => hashset!{"search", "*"},
|
||||||
("POST", "/indexes/products/documents") => hashset!{"documents.add", "documents.*", "*"},
|
("POST", "/indexes/products/documents") => hashset!{"documents.add", "documents.*", "*"},
|
||||||
@ -51,6 +52,7 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
|
|||||||
("GET", "/stats") => hashset!{"stats.get", "stats.*", "*"},
|
("GET", "/stats") => hashset!{"stats.get", "stats.*", "*"},
|
||||||
("POST", "/dumps") => hashset!{"dumps.create", "dumps.*", "*"},
|
("POST", "/dumps") => hashset!{"dumps.create", "dumps.*", "*"},
|
||||||
("GET", "/version") => hashset!{"version", "*"},
|
("GET", "/version") => hashset!{"version", "*"},
|
||||||
|
("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"},
|
||||||
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
|
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
|
||||||
("GET", "/keys/mykey/") => hashset!{"keys.get", "*"},
|
("GET", "/keys/mykey/") => hashset!{"keys.get", "*"},
|
||||||
("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"},
|
("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"},
|
||||||
@ -58,10 +60,6 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
|
|||||||
("GET", "/keys") => hashset!{"keys.get", "*"},
|
("GET", "/keys") => hashset!{"keys.get", "*"},
|
||||||
};
|
};
|
||||||
|
|
||||||
if cfg!(feature = "metrics") {
|
|
||||||
authorizations.insert(("GET", "/metrics"), hashset! {"metrics.get", "metrics.*", "*"});
|
|
||||||
}
|
|
||||||
|
|
||||||
authorizations
|
authorizations
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -77,6 +75,14 @@ static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
|
|||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
|
static INVALID_METRICS_RESPONSE: Lazy<Value> = Lazy::new(|| {
|
||||||
|
json!({"message": "The provided API key is invalid. The API key for the `/metrics` route must allow access to all indexes.",
|
||||||
|
"code": "invalid_api_key",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
const MASTER_KEY: &str = "MASTER_KEY";
|
const MASTER_KEY: &str = "MASTER_KEY";
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@ -204,15 +210,28 @@ async fn access_authorized_restricted_index() {
|
|||||||
|
|
||||||
let (response, code) = server.dummy_request(method, route).await;
|
let (response, code) = server.dummy_request(method, route).await;
|
||||||
|
|
||||||
assert_ne!(
|
// The metrics route MUST have no limitation on the indexes
|
||||||
response,
|
if *route == "/metrics" {
|
||||||
INVALID_RESPONSE.clone(),
|
assert_eq!(
|
||||||
"on route: {:?} - {:?} with action: {:?}",
|
response,
|
||||||
method,
|
INVALID_METRICS_RESPONSE.clone(),
|
||||||
route,
|
"on route: {:?} - {:?} with action: {:?}",
|
||||||
action
|
method,
|
||||||
);
|
route,
|
||||||
assert_ne!(code, 403);
|
action
|
||||||
|
);
|
||||||
|
assert_eq!(code, 403);
|
||||||
|
} else {
|
||||||
|
assert_ne!(
|
||||||
|
response,
|
||||||
|
INVALID_RESPONSE.clone(),
|
||||||
|
"on route: {:?} - {:?} with action: {:?}",
|
||||||
|
method,
|
||||||
|
route,
|
||||||
|
action
|
||||||
|
);
|
||||||
|
assert_ne!(code, 403);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,8 @@ mod errors;
|
|||||||
mod payload;
|
mod payload;
|
||||||
mod tenant_token;
|
mod tenant_token;
|
||||||
|
|
||||||
|
mod tenant_token_multi_search;
|
||||||
|
|
||||||
use actix_web::http::StatusCode;
|
use actix_web::http::StatusCode;
|
||||||
use serde_json::{json, Value};
|
use serde_json::{json, Value};
|
||||||
|
|
||||||
|
1141
meilisearch/tests/auth/tenant_token_multi_search.rs
Normal file
1141
meilisearch/tests/auth/tenant_token_multi_search.rs
Normal file
File diff suppressed because it is too large
Load Diff
@ -103,6 +103,10 @@ impl Server {
|
|||||||
Index { uid: uid.as_ref().to_string(), service: &self.service, encoder }
|
Index { uid: uid.as_ref().to_string(), service: &self.service, encoder }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn multi_search(&self, queries: Value) -> (Value, StatusCode) {
|
||||||
|
self.service.post("/multi-search", queries).await
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
|
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
|
||||||
self.service.get(format!("/indexes{parameters}")).await
|
self.service.get(format!("/indexes{parameters}")).await
|
||||||
}
|
}
|
||||||
@ -201,10 +205,10 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
|||||||
indexer_options: IndexerOpts {
|
indexer_options: IndexerOpts {
|
||||||
// memory has to be unlimited because several meilisearch are running in test context.
|
// memory has to be unlimited because several meilisearch are running in test context.
|
||||||
max_indexing_memory: MaxMemory::unlimited(),
|
max_indexing_memory: MaxMemory::unlimited(),
|
||||||
|
skip_index_budget: true,
|
||||||
..Parser::parse_from(None as Option<&str>)
|
..Parser::parse_from(None as Option<&str>)
|
||||||
},
|
},
|
||||||
#[cfg(feature = "metrics")]
|
experimental_enable_metrics: true,
|
||||||
enable_metrics_route: true,
|
|
||||||
..Parser::parse_from(None as Option<&str>)
|
..Parser::parse_from(None as Option<&str>)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -280,9 +280,9 @@ async fn replace_documents_bad_csv_delimiter() {
|
|||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
|
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
|
||||||
"code": "invalid_index_csv_delimiter",
|
"code": "invalid_document_csv_delimiter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -292,9 +292,9 @@ async fn replace_documents_bad_csv_delimiter() {
|
|||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
|
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
|
||||||
"code": "invalid_index_csv_delimiter",
|
"code": "invalid_document_csv_delimiter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -305,9 +305,9 @@ async fn replace_documents_bad_csv_delimiter() {
|
|||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "csv delimiter must be an ascii character. Found: `🍰`",
|
"message": "csv delimiter must be an ascii character. Found: `🍰`",
|
||||||
"code": "invalid_index_csv_delimiter",
|
"code": "invalid_document_csv_delimiter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -323,9 +323,9 @@ async fn update_documents_bad_csv_delimiter() {
|
|||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
|
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
|
||||||
"code": "invalid_index_csv_delimiter",
|
"code": "invalid_document_csv_delimiter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -335,9 +335,9 @@ async fn update_documents_bad_csv_delimiter() {
|
|||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
|
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
|
||||||
"code": "invalid_index_csv_delimiter",
|
"code": "invalid_document_csv_delimiter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -352,9 +352,9 @@ async fn update_documents_bad_csv_delimiter() {
|
|||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "csv delimiter must be an ascii character. Found: `🍰`",
|
"message": "csv delimiter must be an ascii character. Found: `🍰`",
|
||||||
"code": "invalid_index_csv_delimiter",
|
"code": "invalid_document_csv_delimiter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_csv_delimiter"
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
@ -442,3 +442,37 @@ async fn displayedattr_2_smol() {
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "default")]
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn test_cjk_highlight() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{ "id": 0, "title": "この度、クーポンで無料で頂きました。" },
|
||||||
|
{ "id": 1, "title": "大卫到了扫罗那里" },
|
||||||
|
]);
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(json!({"q": "で", "attributesToHighlight": ["title"]}), |response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
assert_eq!(
|
||||||
|
response["hits"][0]["_formatted"]["title"],
|
||||||
|
json!("この度、クーポン<em>で</em>無料<em>で</em>頂きました。")
|
||||||
|
);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(json!({"q": "大卫", "attributesToHighlight": ["title"]}), |response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
assert_eq!(
|
||||||
|
response["hits"][0]["_formatted"]["title"],
|
||||||
|
json!("<em>大卫</em>到了扫罗那里")
|
||||||
|
);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
mod errors;
|
mod errors;
|
||||||
mod formatted;
|
mod formatted;
|
||||||
|
mod multi;
|
||||||
mod pagination;
|
mod pagination;
|
||||||
|
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
@ -148,6 +149,49 @@ async fn simple_search() {
|
|||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn phrase_search_with_stop_word() {
|
||||||
|
// related to https://github.com/meilisearch/meilisearch/issues/3521
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let (_, code) = index.update_settings(json!({"stopWords": ["the", "of"]})).await;
|
||||||
|
meili_snap::snapshot!(code, @"202 Accepted");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(json!({"q": "how \"to\" train \"the" }), |response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "default")]
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn test_kanji_language_detection() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{ "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
|
||||||
|
{ "id": 1, "title": "東京のお寿司。" },
|
||||||
|
{ "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
|
||||||
|
]);
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(json!({"q": "東京"}), |response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn search_multiple_params() {
|
async fn search_multiple_params() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
|
343
meilisearch/tests/search/multi.rs
Normal file
343
meilisearch/tests/search/multi.rs
Normal file
@ -0,0 +1,343 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
use super::{DOCUMENTS, NESTED_DOCUMENTS};
|
||||||
|
use crate::common::Server;
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_empty_list() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
|
||||||
|
let (response, code) = server.multi_search(json!({"queries": []})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"results": []
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_json_object() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
|
||||||
|
let (response, code) = server.multi_search(json!({})).await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Missing field `queries`",
|
||||||
|
"code": "bad_request",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_json_array() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
|
||||||
|
let (response, code) = server.multi_search(json!([])).await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Invalid value type: expected an object, but found an array: `[]`",
|
||||||
|
"code": "bad_request",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn simple_search_single_index() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
let (response, code) = server
|
||||||
|
.multi_search(json!({"queries": [
|
||||||
|
{"indexUid" : "test", "q": "glass"},
|
||||||
|
{"indexUid": "test", "q": "captain"},
|
||||||
|
]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"indexUid": "test",
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"title": "Glass",
|
||||||
|
"id": "450465"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "glass",
|
||||||
|
"processingTimeMs": "[time]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"indexUid": "test",
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"id": "299537"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "captain",
|
||||||
|
"processingTimeMs": "[time]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn simple_search_missing_index_uid() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
let (response, code) = server
|
||||||
|
.multi_search(json!({"queries": [
|
||||||
|
{"q": "glass"},
|
||||||
|
]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
insta::assert_json_snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "Missing field `indexUid` inside `.queries[0]`",
|
||||||
|
"code": "missing_index_uid",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#missing_index_uid"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn simple_search_illegal_index_uid() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
let (response, code) = server
|
||||||
|
.multi_search(json!({"queries": [
|
||||||
|
{"indexUid": "hé", "q": "glass"},
|
||||||
|
]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
insta::assert_json_snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||||
|
"code": "invalid_index_uid",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn simple_search_two_indexes() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
let index = server.index("nested");
|
||||||
|
let documents = NESTED_DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
let (response, code) = server
|
||||||
|
.multi_search(json!({"queries": [
|
||||||
|
{"indexUid" : "test", "q": "glass"},
|
||||||
|
{"indexUid": "nested", "q": "pesti"},
|
||||||
|
]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"indexUid": "test",
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"title": "Glass",
|
||||||
|
"id": "450465"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "glass",
|
||||||
|
"processingTimeMs": "[time]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"indexUid": "nested",
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"id": 852,
|
||||||
|
"father": "jean",
|
||||||
|
"mother": "michelle",
|
||||||
|
"doggos": [
|
||||||
|
{
|
||||||
|
"name": "bobby",
|
||||||
|
"age": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "buddy",
|
||||||
|
"age": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"cattos": "pesti"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 654,
|
||||||
|
"father": "pierre",
|
||||||
|
"mother": "sabine",
|
||||||
|
"doggos": [
|
||||||
|
{
|
||||||
|
"name": "gros bill",
|
||||||
|
"age": 8
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"cattos": [
|
||||||
|
"simba",
|
||||||
|
"pestiféré"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "pesti",
|
||||||
|
"processingTimeMs": "[time]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_one_index_doesnt_exist() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
let (response, code) = server
|
||||||
|
.multi_search(json!({"queries": [
|
||||||
|
{"indexUid" : "test", "q": "glass"},
|
||||||
|
{"indexUid": "nested", "q": "pesti"},
|
||||||
|
]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Inside `.queries[1]`: Index `nested` not found.",
|
||||||
|
"code": "index_not_found",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_multiple_indexes_dont_exist() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
|
||||||
|
let (response, code) = server
|
||||||
|
.multi_search(json!({"queries": [
|
||||||
|
{"indexUid" : "test", "q": "glass"},
|
||||||
|
{"indexUid": "nested", "q": "pesti"},
|
||||||
|
]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Inside `.queries[0]`: Index `test` not found.",
|
||||||
|
"code": "index_not_found",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_one_query_error() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
let index = server.index("nested");
|
||||||
|
let documents = NESTED_DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
let (response, code) = server
|
||||||
|
.multi_search(json!({"queries": [
|
||||||
|
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
|
||||||
|
{"indexUid": "nested", "q": "pesti"},
|
||||||
|
]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Inside `.queries[0]`: Invalid facet distribution, this index does not have configured filterable attributes.",
|
||||||
|
"code": "invalid_search_facets",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_multiple_query_errors() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
let index = server.index("nested");
|
||||||
|
let documents = NESTED_DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
let (response, code) = server
|
||||||
|
.multi_search(json!({"queries": [
|
||||||
|
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
|
||||||
|
{"indexUid": "nested", "q": "pesti", "facets": ["doggos"]},
|
||||||
|
]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Inside `.queries[0]`: Invalid facet distribution, this index does not have configured filterable attributes.",
|
||||||
|
"code": "invalid_search_facets",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
@ -16,7 +16,7 @@ bimap = { version = "0.6.2", features = ["serde"] }
|
|||||||
bincode = "1.3.3"
|
bincode = "1.3.3"
|
||||||
bstr = "1.0.1"
|
bstr = "1.0.1"
|
||||||
byteorder = "1.4.3"
|
byteorder = "1.4.3"
|
||||||
charabia = { version = "0.7.0", default-features = false }
|
charabia = { version = "0.7.1", default-features = false }
|
||||||
concat-arrays = "0.1.2"
|
concat-arrays = "0.1.2"
|
||||||
crossbeam-channel = "0.5.6"
|
crossbeam-channel = "0.5.6"
|
||||||
deserr = "0.5.0"
|
deserr = "0.5.0"
|
||||||
|
@ -59,6 +59,8 @@ pub enum InternalError {
|
|||||||
Utf8(#[from] str::Utf8Error),
|
Utf8(#[from] str::Utf8Error),
|
||||||
#[error("An indexation process was explicitly aborted.")]
|
#[error("An indexation process was explicitly aborted.")]
|
||||||
AbortedIndexation,
|
AbortedIndexation,
|
||||||
|
#[error("The matching words list contains at least one invalid member.")]
|
||||||
|
InvalidMatchingWords,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
|
@ -5,6 +5,7 @@ mod field_id_word_count_codec;
|
|||||||
mod obkv_codec;
|
mod obkv_codec;
|
||||||
mod roaring_bitmap;
|
mod roaring_bitmap;
|
||||||
mod roaring_bitmap_length;
|
mod roaring_bitmap_length;
|
||||||
|
mod script_language_codec;
|
||||||
mod str_beu32_codec;
|
mod str_beu32_codec;
|
||||||
mod str_ref;
|
mod str_ref;
|
||||||
mod str_str_u8_codec;
|
mod str_str_u8_codec;
|
||||||
@ -19,5 +20,6 @@ pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, Roar
|
|||||||
pub use self::roaring_bitmap_length::{
|
pub use self::roaring_bitmap_length::{
|
||||||
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||||
};
|
};
|
||||||
|
pub use self::script_language_codec::ScriptLanguageCodec;
|
||||||
pub use self::str_beu32_codec::StrBEU32Codec;
|
pub use self::str_beu32_codec::StrBEU32Codec;
|
||||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||||
|
38
milli/src/heed_codec/script_language_codec.rs
Normal file
38
milli/src/heed_codec/script_language_codec.rs
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
use std::str;
|
||||||
|
|
||||||
|
use charabia::{Language, Script};
|
||||||
|
|
||||||
|
pub struct ScriptLanguageCodec;
|
||||||
|
|
||||||
|
impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
|
||||||
|
type DItem = (Script, Language);
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let sep = bytes.iter().position(|b| *b == 0)?;
|
||||||
|
let (s_bytes, l_bytes) = bytes.split_at(sep);
|
||||||
|
let script = str::from_utf8(s_bytes).ok()?;
|
||||||
|
let script_name = Script::from_name(script);
|
||||||
|
let lan = str::from_utf8(l_bytes).ok()?;
|
||||||
|
// skip '\0' byte between the two strings.
|
||||||
|
let lan_name = Language::from_name(&lan[1..]);
|
||||||
|
|
||||||
|
Some((script_name, lan_name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
|
||||||
|
type EItem = (Script, Language);
|
||||||
|
|
||||||
|
fn bytes_encode((script, lan): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
|
let script_name = script.name().as_bytes();
|
||||||
|
let lan_name = lan.name().as_bytes();
|
||||||
|
|
||||||
|
let mut bytes = Vec::with_capacity(script_name.len() + lan_name.len() + 1);
|
||||||
|
bytes.extend_from_slice(script_name);
|
||||||
|
bytes.push(0);
|
||||||
|
bytes.extend_from_slice(lan_name);
|
||||||
|
|
||||||
|
Some(Cow::Owned(bytes))
|
||||||
|
}
|
||||||
|
}
|
@ -4,6 +4,7 @@ use std::fs::File;
|
|||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
|
use charabia::{Language, Script};
|
||||||
use heed::flags::Flags;
|
use heed::flags::Flags;
|
||||||
use heed::types::*;
|
use heed::types::*;
|
||||||
use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn};
|
use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn};
|
||||||
@ -18,7 +19,7 @@ use crate::heed_codec::facet::{
|
|||||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||||
FieldIdCodec, OrderedF64Codec,
|
FieldIdCodec, OrderedF64Codec,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::StrRefCodec;
|
use crate::heed_codec::{ScriptLanguageCodec, StrRefCodec};
|
||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||||
@ -83,6 +84,7 @@ pub mod db_name {
|
|||||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||||
pub const DOCUMENTS: &str = "documents";
|
pub const DOCUMENTS: &str = "documents";
|
||||||
|
pub const SCRIPT_LANGUAGE_DOCIDS: &str = "script_language_docids";
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -122,6 +124,9 @@ pub struct Index {
|
|||||||
/// Maps the position of a word prefix with all the docids where this prefix appears.
|
/// Maps the position of a word prefix with all the docids where this prefix appears.
|
||||||
pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
|
pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
|
||||||
|
|
||||||
|
/// Maps the script and language with all the docids that corresponds to it.
|
||||||
|
pub script_language_docids: Database<ScriptLanguageCodec, RoaringBitmapCodec>,
|
||||||
|
|
||||||
/// Maps the facet field id and the docids for which this field exists
|
/// Maps the facet field id and the docids for which this field exists
|
||||||
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||||
|
|
||||||
@ -148,7 +153,7 @@ impl Index {
|
|||||||
) -> Result<Index> {
|
) -> Result<Index> {
|
||||||
use db_name::*;
|
use db_name::*;
|
||||||
|
|
||||||
options.max_dbs(18);
|
options.max_dbs(19);
|
||||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||||
|
|
||||||
let env = options.open(path)?;
|
let env = options.open(path)?;
|
||||||
@ -159,6 +164,7 @@ impl Index {
|
|||||||
let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?;
|
let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?;
|
||||||
let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
|
let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
|
||||||
let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
|
let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
|
||||||
|
let script_language_docids = env.create_database(Some(SCRIPT_LANGUAGE_DOCIDS))?;
|
||||||
let word_prefix_pair_proximity_docids =
|
let word_prefix_pair_proximity_docids =
|
||||||
env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
|
env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
|
||||||
let prefix_word_pair_proximity_docids =
|
let prefix_word_pair_proximity_docids =
|
||||||
@ -186,6 +192,7 @@ impl Index {
|
|||||||
exact_word_prefix_docids,
|
exact_word_prefix_docids,
|
||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
word_pair_proximity_docids,
|
word_pair_proximity_docids,
|
||||||
|
script_language_docids,
|
||||||
word_prefix_pair_proximity_docids,
|
word_prefix_pair_proximity_docids,
|
||||||
prefix_word_pair_proximity_docids,
|
prefix_word_pair_proximity_docids,
|
||||||
word_position_docids,
|
word_position_docids,
|
||||||
@ -1187,6 +1194,38 @@ impl Index {
|
|||||||
pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
self.main.delete::<_, Str>(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* script language docids */
|
||||||
|
/// Retrieve all the documents ids that correspond with (Script, Language) key, `None` if it is any.
|
||||||
|
pub fn script_language_documents_ids(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
key: &(Script, Language),
|
||||||
|
) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
|
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
|
||||||
|
let doc_ids = self.script_language_docids.get(rtxn, key)?;
|
||||||
|
Ok(doc_ids.map(|ids| ids - soft_deleted_documents))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn script_language(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Script, Vec<Language>>> {
|
||||||
|
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
|
||||||
|
|
||||||
|
let mut script_language: HashMap<Script, Vec<Language>> = HashMap::new();
|
||||||
|
for sl in self.script_language_docids.iter(rtxn)? {
|
||||||
|
let ((script, language), docids) = sl?;
|
||||||
|
|
||||||
|
// keep only Languages that contains at least 1 document.
|
||||||
|
if !soft_deleted_documents.is_superset(&docids) {
|
||||||
|
if let Some(languages) = script_language.get_mut(&script) {
|
||||||
|
(*languages).push(language);
|
||||||
|
} else {
|
||||||
|
script_language.insert(script, vec![language]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(script_language)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use std::mem::take;
|
use std::mem::take;
|
||||||
|
|
||||||
|
use heed::BytesDecode;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
@ -7,7 +8,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
||||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||||
@ -196,6 +197,38 @@ fn facet_ordered_iterative<'t>(
|
|||||||
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
|
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn facet_extreme_value<'t>(
|
||||||
|
mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,
|
||||||
|
) -> Result<Option<f64>> {
|
||||||
|
let extreme_value =
|
||||||
|
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
|
||||||
|
let (_, extreme_value) = extreme_value?;
|
||||||
|
|
||||||
|
Ok(OrderedF64Codec::bytes_decode(extreme_value))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn facet_min_value<'t>(
|
||||||
|
index: &'t Index,
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
|
field_id: FieldId,
|
||||||
|
candidates: RoaringBitmap,
|
||||||
|
) -> Result<Option<f64>> {
|
||||||
|
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||||
|
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||||
|
facet_extreme_value(it)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn facet_max_value<'t>(
|
||||||
|
index: &'t Index,
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
|
field_id: FieldId,
|
||||||
|
candidates: RoaringBitmap,
|
||||||
|
) -> Result<Option<f64>> {
|
||||||
|
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||||
|
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||||
|
facet_extreme_value(it)
|
||||||
|
}
|
||||||
|
|
||||||
fn facet_ordered_set_based<'t>(
|
fn facet_ordered_set_based<'t>(
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
@ -203,23 +236,24 @@ fn facet_ordered_set_based<'t>(
|
|||||||
is_ascending: bool,
|
is_ascending: bool,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||||
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
|
let number_db =
|
||||||
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||||
|
let string_db =
|
||||||
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||||
|
|
||||||
let number_iter = make_iter(
|
let (number_iter, string_iter) = if is_ascending {
|
||||||
rtxn,
|
let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?;
|
||||||
field_id,
|
|
||||||
candidates.clone(),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let string_iter = make_iter(
|
(itertools::Either::Left(number_iter), itertools::Either::Left(string_iter))
|
||||||
rtxn,
|
} else {
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
|
||||||
field_id,
|
let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?;
|
||||||
candidates,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
Ok(Box::new(number_iter.chain(string_iter)))
|
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Box::new(number_iter.chain(string_iter).map(|res| res.map(|(doc_ids, _)| doc_ids))))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an iterator over groups of the given candidates in ascending or descending order.
|
/// Returns an iterator over groups of the given candidates in ascending or descending order.
|
||||||
|
@ -21,6 +21,7 @@ use crate::update::{MAX_LENGTH_FOR_PREFIX_PROXIMITY_DB, MAX_PROXIMITY_FOR_PREFIX
|
|||||||
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
|
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
|
||||||
|
|
||||||
mod asc_desc;
|
mod asc_desc;
|
||||||
|
pub use asc_desc::{facet_max_value, facet_min_value};
|
||||||
mod attribute;
|
mod attribute;
|
||||||
mod exactness;
|
mod exactness;
|
||||||
pub mod r#final;
|
pub mod r#final;
|
||||||
|
@ -278,6 +278,65 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
|
||||||
|
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||||
|
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
||||||
|
let candidates = if let Some(candidates) = self.candidates.clone() {
|
||||||
|
candidates
|
||||||
|
} else {
|
||||||
|
return Ok(Default::default());
|
||||||
|
};
|
||||||
|
|
||||||
|
let fields = match &self.facets {
|
||||||
|
Some(facets) => {
|
||||||
|
let invalid_fields: HashSet<_> = facets
|
||||||
|
.iter()
|
||||||
|
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
|
||||||
|
.collect();
|
||||||
|
if !invalid_fields.is_empty() {
|
||||||
|
return Err(UserError::InvalidFacetsDistribution {
|
||||||
|
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
|
||||||
|
valid_facets_name: filterable_fields.into_iter().collect(),
|
||||||
|
}
|
||||||
|
.into());
|
||||||
|
} else {
|
||||||
|
facets.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => filterable_fields,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut distribution = BTreeMap::new();
|
||||||
|
for (fid, name) in fields_ids_map.iter() {
|
||||||
|
if crate::is_faceted(name, &fields) {
|
||||||
|
let min_value = if let Some(min_value) = crate::search::criteria::facet_min_value(
|
||||||
|
self.index,
|
||||||
|
self.rtxn,
|
||||||
|
fid,
|
||||||
|
candidates.clone(),
|
||||||
|
)? {
|
||||||
|
min_value
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let max_value = if let Some(max_value) = crate::search::criteria::facet_max_value(
|
||||||
|
self.index,
|
||||||
|
self.rtxn,
|
||||||
|
fid,
|
||||||
|
candidates.clone(),
|
||||||
|
)? {
|
||||||
|
max_value
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
distribution.insert(name.to_string(), (min_value, max_value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(distribution)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
||||||
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||||
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
||||||
@ -537,4 +596,216 @@ mod tests {
|
|||||||
|
|
||||||
milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992");
|
milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn facet_stats() {
|
||||||
|
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||||
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let mut documents = vec![];
|
||||||
|
for i in 0..1000 {
|
||||||
|
let document = serde_json::json!({
|
||||||
|
"colour": facet_values[i % 1000],
|
||||||
|
})
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
documents.push(document);
|
||||||
|
}
|
||||||
|
|
||||||
|
let documents = documents_batch_reader_from_objects(documents);
|
||||||
|
|
||||||
|
index.add_documents(documents).unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.candidates((0..1000).into_iter().collect())
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.candidates((217..777).into_iter().collect())
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn facet_stats_array() {
|
||||||
|
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||||
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let mut documents = vec![];
|
||||||
|
for i in 0..1000 {
|
||||||
|
let document = serde_json::json!({
|
||||||
|
"colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
|
||||||
|
})
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
documents.push(document);
|
||||||
|
}
|
||||||
|
|
||||||
|
let documents = documents_batch_reader_from_objects(documents);
|
||||||
|
|
||||||
|
index.add_documents(documents).unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.candidates((0..1000).into_iter().collect())
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1999.0)}"###);
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.candidates((217..777).into_iter().collect())
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 1776.0)}"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn facet_stats_mixed_array() {
|
||||||
|
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||||
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let mut documents = vec![];
|
||||||
|
for i in 0..1000 {
|
||||||
|
let document = serde_json::json!({
|
||||||
|
"colour": [facet_values[i % 1000], format!("{}", facet_values[i % 1000] + 1000)],
|
||||||
|
})
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.clone();
|
||||||
|
documents.push(document);
|
||||||
|
}
|
||||||
|
|
||||||
|
let documents = documents_batch_reader_from_objects(documents);
|
||||||
|
|
||||||
|
index.add_documents(documents).unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.candidates((0..1000).into_iter().collect())
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.candidates((217..777).into_iter().collect())
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn facet_mixed_values() {
|
||||||
|
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||||
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let mut documents = vec![];
|
||||||
|
for i in 0..1000 {
|
||||||
|
let document = if i % 2 == 0 {
|
||||||
|
serde_json::json!({
|
||||||
|
"colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
serde_json::json!({
|
||||||
|
"colour": format!("{}", facet_values[i % 1000] + 10000),
|
||||||
|
})
|
||||||
|
};
|
||||||
|
let document = document.as_object().unwrap().clone();
|
||||||
|
documents.push(document);
|
||||||
|
}
|
||||||
|
|
||||||
|
let documents = documents_batch_reader_from_objects(documents);
|
||||||
|
|
||||||
|
index.add_documents(documents).unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.candidates((0..1000).into_iter().collect())
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1998.0)}"###);
|
||||||
|
|
||||||
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
|
.facets(std::iter::once("colour"))
|
||||||
|
.candidates((217..777).into_iter().collect())
|
||||||
|
.compute_stats()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (218.0, 1776.0)}"###);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -34,15 +34,20 @@ pub fn ascending_facet_sort<'t>(
|
|||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
||||||
|
|
||||||
Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }))
|
Ok(itertools::Either::Left(AscendingFacetSort {
|
||||||
|
rtxn,
|
||||||
|
db,
|
||||||
|
field_id,
|
||||||
|
stack: vec![(candidates, iter)],
|
||||||
|
}))
|
||||||
} else {
|
} else {
|
||||||
Ok(Box::new(std::iter::empty()))
|
Ok(itertools::Either::Right(std::iter::empty()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -60,7 +65,7 @@ struct AscendingFacetSort<'t, 'e> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
|
impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
|
||||||
type Item = Result<RoaringBitmap>;
|
type Item = Result<(RoaringBitmap, &'t [u8])>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
'outer: loop {
|
'outer: loop {
|
||||||
@ -90,7 +95,8 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
|
|||||||
*documents_ids -= &bitmap;
|
*documents_ids -= &bitmap;
|
||||||
|
|
||||||
if level == 0 {
|
if level == 0 {
|
||||||
return Some(Ok(bitmap));
|
// Since the level is 0, the left_bound is the exact value.
|
||||||
|
return Some(Ok((bitmap, left_bound)));
|
||||||
}
|
}
|
||||||
let starting_key_below =
|
let starting_key_below =
|
||||||
FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound };
|
FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound };
|
||||||
@ -130,7 +136,7 @@ mod tests {
|
|||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
|
let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
@ -152,7 +158,7 @@ mod tests {
|
|||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
|
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
@ -161,7 +167,7 @@ mod tests {
|
|||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
|
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
@ -183,7 +189,7 @@ mod tests {
|
|||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
|
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
@ -192,7 +198,7 @@ mod tests {
|
|||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
|
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
@ -214,7 +220,7 @@ mod tests {
|
|||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
|
let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
|
@ -17,21 +17,21 @@ pub fn descending_facet_sort<'t>(
|
|||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
|
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
|
||||||
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
||||||
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
||||||
Ok(Box::new(DescendingFacetSort {
|
Ok(itertools::Either::Left(DescendingFacetSort {
|
||||||
rtxn,
|
rtxn,
|
||||||
db,
|
db,
|
||||||
field_id,
|
field_id,
|
||||||
stack: vec![(candidates, iter, Bound::Included(last_bound))],
|
stack: vec![(candidates, iter, Bound::Included(last_bound))],
|
||||||
}))
|
}))
|
||||||
} else {
|
} else {
|
||||||
Ok(Box::new(std::iter::empty()))
|
Ok(itertools::Either::Right(std::iter::empty()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,7 +50,7 @@ struct DescendingFacetSort<'t> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'t> Iterator for DescendingFacetSort<'t> {
|
impl<'t> Iterator for DescendingFacetSort<'t> {
|
||||||
type Item = Result<RoaringBitmap>;
|
type Item = Result<(RoaringBitmap, &'t [u8])>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
'outer: loop {
|
'outer: loop {
|
||||||
@ -77,7 +77,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
|||||||
*documents_ids -= &bitmap;
|
*documents_ids -= &bitmap;
|
||||||
|
|
||||||
if level == 0 {
|
if level == 0 {
|
||||||
return Some(Ok(bitmap));
|
// Since we're at the level 0 the left_bound is the exact value.
|
||||||
|
return Some(Ok((bitmap, left_bound)));
|
||||||
}
|
}
|
||||||
let starting_key_below =
|
let starting_key_below =
|
||||||
FacetGroupKey { field_id, level: level - 1, left_bound };
|
FacetGroupKey { field_id, level: level - 1, left_bound };
|
||||||
@ -146,7 +147,7 @@ mod tests {
|
|||||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||||
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
@ -169,7 +170,7 @@ mod tests {
|
|||||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||||
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
|
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
@ -179,7 +180,7 @@ mod tests {
|
|||||||
|
|
||||||
let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap();
|
let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
@ -200,7 +201,7 @@ mod tests {
|
|||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
|
let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
@ -209,7 +210,7 @@ mod tests {
|
|||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap();
|
let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
@ -231,7 +232,7 @@ mod tests {
|
|||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
|
let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
results.push_str(&display_bitmap(&docids));
|
results.push_str(&display_bitmap(&docids));
|
||||||
results.push('\n');
|
results.push('\n');
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@ use std::rc::Rc;
|
|||||||
use charabia::Token;
|
use charabia::Token;
|
||||||
use levenshtein_automata::{Distance, DFA};
|
use levenshtein_automata::{Distance, DFA};
|
||||||
|
|
||||||
|
use crate::error::InternalError;
|
||||||
use crate::search::build_dfa;
|
use crate::search::build_dfa;
|
||||||
use crate::MAX_WORD_LENGTH;
|
use crate::MAX_WORD_LENGTH;
|
||||||
|
|
||||||
@ -31,12 +32,19 @@ impl fmt::Debug for MatchingWords {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MatchingWords {
|
impl MatchingWords {
|
||||||
pub fn new(mut matching_words: Vec<(Vec<Rc<MatchingWord>>, Vec<PrimitiveWordId>)>) -> Self {
|
pub fn new(
|
||||||
|
mut matching_words: Vec<(Vec<Rc<MatchingWord>>, Vec<PrimitiveWordId>)>,
|
||||||
|
) -> crate::Result<Self> {
|
||||||
|
// if one of the matching_words vec doesn't contain a word.
|
||||||
|
if matching_words.iter().any(|(mw, _)| mw.is_empty()) {
|
||||||
|
return Err(InternalError::InvalidMatchingWords.into());
|
||||||
|
}
|
||||||
|
|
||||||
// Sort word by len in DESC order prioritizing the longuest matches,
|
// Sort word by len in DESC order prioritizing the longuest matches,
|
||||||
// in order to highlight the longuest part of the matched word.
|
// in order to highlight the longuest part of the matched word.
|
||||||
matching_words.sort_unstable_by_key(|(mw, _)| Reverse((mw.len(), mw[0].word.len())));
|
matching_words.sort_unstable_by_key(|(mw, _)| Reverse((mw.len(), mw[0].word.len())));
|
||||||
|
|
||||||
Self { inner: matching_words }
|
Ok(Self { inner: matching_words })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an iterator over terms that match or partially match the given token.
|
/// Returns an iterator over terms that match or partially match the given token.
|
||||||
@ -360,7 +368,7 @@ mod tests {
|
|||||||
(vec![all[2].clone()], vec![2]),
|
(vec![all[2].clone()], vec![2]),
|
||||||
];
|
];
|
||||||
|
|
||||||
let matching_words = MatchingWords::new(matching_words);
|
let matching_words = MatchingWords::new(matching_words).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
matching_words
|
matching_words
|
||||||
|
@ -513,7 +513,7 @@ mod tests {
|
|||||||
(vec![all[2].clone()], vec![2]),
|
(vec![all[2].clone()], vec![2]),
|
||||||
];
|
];
|
||||||
|
|
||||||
MatchingWords::new(matching_words)
|
MatchingWords::new(matching_words).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MatcherBuilder<'_, Vec<u8>> {
|
impl MatcherBuilder<'_, Vec<u8>> {
|
||||||
@ -600,7 +600,7 @@ mod tests {
|
|||||||
];
|
];
|
||||||
let matching_words = vec![(vec![all[0].clone()], vec![0]), (vec![all[1].clone()], vec![1])];
|
let matching_words = vec![(vec![all[0].clone()], vec![0]), (vec![all[1].clone()], vec![1])];
|
||||||
|
|
||||||
let matching_words = MatchingWords::new(matching_words);
|
let matching_words = MatchingWords::new(matching_words).unwrap();
|
||||||
|
|
||||||
let builder = MatcherBuilder::from_matching_words(matching_words);
|
let builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
|
|
||||||
@ -847,7 +847,7 @@ mod tests {
|
|||||||
(vec![all[4].clone()], vec![2]),
|
(vec![all[4].clone()], vec![2]),
|
||||||
];
|
];
|
||||||
|
|
||||||
let matching_words = MatchingWords::new(matching_words);
|
let matching_words = MatchingWords::new(matching_words).unwrap();
|
||||||
|
|
||||||
let mut builder = MatcherBuilder::from_matching_words(matching_words);
|
let mut builder = MatcherBuilder::from_matching_words(matching_words);
|
||||||
builder.highlight_prefix("_".to_string());
|
builder.highlight_prefix("_".to_string());
|
||||||
|
@ -152,6 +152,11 @@ impl<'a> Search<'a> {
|
|||||||
tokbuilder.stop_words(stop_words);
|
tokbuilder.stop_words(stop_words);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let script_lang_map = self.index.script_language(self.rtxn)?;
|
||||||
|
if !script_lang_map.is_empty() {
|
||||||
|
tokbuilder.allow_list(&script_lang_map);
|
||||||
|
}
|
||||||
|
|
||||||
let tokenizer = tokbuilder.build();
|
let tokenizer = tokbuilder.build();
|
||||||
let tokens = tokenizer.tokenize(query);
|
let tokens = tokenizer.tokenize(query);
|
||||||
builder
|
builder
|
||||||
@ -446,6 +451,28 @@ mod test {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
|
|
||||||
|
#[cfg(feature = "default")]
|
||||||
|
#[test]
|
||||||
|
fn test_kanji_language_detection() {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{ "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
|
||||||
|
{ "id": 1, "title": "東京のお寿司。" },
|
||||||
|
{ "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.write_txn().unwrap();
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
|
||||||
|
search.query("東京");
|
||||||
|
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
|
|
||||||
|
assert_eq!(documents_ids, vec![1]);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_is_authorized_typos() {
|
fn test_is_authorized_typos() {
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
@ -747,7 +747,7 @@ fn create_matching_words(
|
|||||||
let mut matching_word_cache = MatchingWordCache::default();
|
let mut matching_word_cache = MatchingWordCache::default();
|
||||||
let mut matching_words = Vec::new();
|
let mut matching_words = Vec::new();
|
||||||
ngrams(ctx, authorize_typos, query, &mut matching_words, &mut matching_word_cache, 0)?;
|
ngrams(ctx, authorize_typos, query, &mut matching_words, &mut matching_word_cache, 0)?;
|
||||||
Ok(MatchingWords::new(matching_words))
|
MatchingWords::new(matching_words)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type PrimitiveQuery = Vec<PrimitiveQueryPart>;
|
pub type PrimitiveQuery = Vec<PrimitiveQueryPart>;
|
||||||
@ -825,9 +825,13 @@ where
|
|||||||
quoted = !quoted;
|
quoted = !quoted;
|
||||||
}
|
}
|
||||||
// if there is a quote or a hard separator we close the phrase.
|
// if there is a quote or a hard separator we close the phrase.
|
||||||
if !phrase.is_empty() && (quote_count > 0 || separator_kind == SeparatorKind::Hard)
|
if quote_count > 0 || separator_kind == SeparatorKind::Hard {
|
||||||
{
|
let phrase = mem::take(&mut phrase);
|
||||||
primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase)));
|
|
||||||
|
// if the phrase only contains stop words, we don't keep it in the query.
|
||||||
|
if phrase.iter().any(|w| w.is_some()) {
|
||||||
|
primitive_query.push(PrimitiveQueryPart::Phrase(phrase));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
@ -835,7 +839,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If a quote is never closed, we consider all of the end of the query as a phrase.
|
// If a quote is never closed, we consider all of the end of the query as a phrase.
|
||||||
if !phrase.is_empty() {
|
if phrase.iter().any(|w| w.is_some()) {
|
||||||
primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase)));
|
primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,6 +30,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
word_position_docids,
|
word_position_docids,
|
||||||
field_id_word_count_docids,
|
field_id_word_count_docids,
|
||||||
word_prefix_position_docids,
|
word_prefix_position_docids,
|
||||||
|
script_language_docids,
|
||||||
facet_id_f64_docids,
|
facet_id_f64_docids,
|
||||||
facet_id_string_docids,
|
facet_id_string_docids,
|
||||||
facet_id_exists_docids,
|
facet_id_exists_docids,
|
||||||
@ -82,6 +83,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
word_position_docids.clear(self.wtxn)?;
|
word_position_docids.clear(self.wtxn)?;
|
||||||
field_id_word_count_docids.clear(self.wtxn)?;
|
field_id_word_count_docids.clear(self.wtxn)?;
|
||||||
word_prefix_position_docids.clear(self.wtxn)?;
|
word_prefix_position_docids.clear(self.wtxn)?;
|
||||||
|
script_language_docids.clear(self.wtxn)?;
|
||||||
facet_id_f64_docids.clear(self.wtxn)?;
|
facet_id_f64_docids.clear(self.wtxn)?;
|
||||||
facet_id_exists_docids.clear(self.wtxn)?;
|
facet_id_exists_docids.clear(self.wtxn)?;
|
||||||
facet_id_string_docids.clear(self.wtxn)?;
|
facet_id_string_docids.clear(self.wtxn)?;
|
||||||
|
@ -243,6 +243,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
facet_id_string_docids: _,
|
facet_id_string_docids: _,
|
||||||
field_id_docid_facet_f64s: _,
|
field_id_docid_facet_f64s: _,
|
||||||
field_id_docid_facet_strings: _,
|
field_id_docid_facet_strings: _,
|
||||||
|
script_language_docids,
|
||||||
facet_id_exists_docids,
|
facet_id_exists_docids,
|
||||||
documents,
|
documents,
|
||||||
} = self.index;
|
} = self.index;
|
||||||
@ -499,6 +500,22 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
.execute(self.wtxn)?;
|
.execute(self.wtxn)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove the documents ids from the script language database.
|
||||||
|
let mut iter = script_language_docids.iter_mut(self.wtxn)?;
|
||||||
|
while let Some((key, mut docids)) = iter.next().transpose()? {
|
||||||
|
let previous_len = docids.len();
|
||||||
|
docids -= &self.to_delete_docids;
|
||||||
|
if docids.is_empty() {
|
||||||
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
|
unsafe { iter.del_current()? };
|
||||||
|
} else if docids.len() != previous_len {
|
||||||
|
let key = key.to_owned();
|
||||||
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
|
unsafe { iter.put_current(&key, &docids)? };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
drop(iter);
|
||||||
// We delete the documents ids that are under the facet field id values.
|
// We delete the documents ids that are under the facet field id values.
|
||||||
remove_docids_from_facet_id_exists_docids(
|
remove_docids_from_facet_id_exists_docids(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
@ -1166,4 +1183,52 @@ mod tests {
|
|||||||
stats_should_not_return_deleted_documents_(DeletionStrategy::AlwaysHard);
|
stats_should_not_return_deleted_documents_(DeletionStrategy::AlwaysHard);
|
||||||
stats_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
|
stats_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn stored_detected_script_and_language_should_not_return_deleted_documents_(
|
||||||
|
deletion_strategy: DeletionStrategy,
|
||||||
|
) {
|
||||||
|
use charabia::{Language, Script};
|
||||||
|
let index = TempIndex::new();
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
index
|
||||||
|
.add_documents_using_wtxn(
|
||||||
|
&mut wtxn,
|
||||||
|
documents!([
|
||||||
|
{ "id": "0", "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
|
||||||
|
{ "id": "1", "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
|
||||||
|
{ "id": "2", "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
|
||||||
|
{ "id": "3", "title": "関西国際空港限定トートバッグ すもももももももものうち" },
|
||||||
|
{ "id": "4", "title": "ภาษาไทยง่ายนิดเดียว" },
|
||||||
|
{ "id": "5", "title": "The quick 在尊嚴和權利上一律平等。" },
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let key_cmn = (Script::Cj, Language::Cmn);
|
||||||
|
let cj_cmn_docs =
|
||||||
|
index.script_language_documents_ids(&wtxn, &key_cmn).unwrap().unwrap_or_default();
|
||||||
|
let mut expected_cj_cmn_docids = RoaringBitmap::new();
|
||||||
|
expected_cj_cmn_docids.push(1);
|
||||||
|
expected_cj_cmn_docids.push(5);
|
||||||
|
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
|
||||||
|
|
||||||
|
delete_documents(&mut wtxn, &index, &["1"], deletion_strategy);
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let cj_cmn_docs =
|
||||||
|
index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap_or_default();
|
||||||
|
let mut expected_cj_cmn_docids = RoaringBitmap::new();
|
||||||
|
expected_cj_cmn_docids.push(5);
|
||||||
|
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stored_detected_script_and_language_should_not_return_deleted_documents() {
|
||||||
|
stored_detected_script_and_language_should_not_return_deleted_documents_(
|
||||||
|
DeletionStrategy::AlwaysHard,
|
||||||
|
);
|
||||||
|
stored_detected_script_and_language_should_not_return_deleted_documents_(
|
||||||
|
DeletionStrategy::AlwaysSoft,
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::{io, mem, str};
|
use std::{io, mem, str};
|
||||||
|
|
||||||
use charabia::{SeparatorKind, Token, TokenKind, TokenizerBuilder};
|
use charabia::{Language, Script, SeparatorKind, Token, TokenKind, TokenizerBuilder};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
@ -13,6 +13,8 @@ use crate::{
|
|||||||
absolute_from_relative_position, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH,
|
absolute_from_relative_position, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), RoaringBitmap>;
|
||||||
|
|
||||||
/// Extracts the word and positions where this word appear and
|
/// Extracts the word and positions where this word appear and
|
||||||
/// prefixes it by the document id.
|
/// prefixes it by the document id.
|
||||||
///
|
///
|
||||||
@ -25,12 +27,13 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
searchable_fields: &Option<HashSet<FieldId>>,
|
searchable_fields: &Option<HashSet<FieldId>>,
|
||||||
stop_words: Option<&fst::Set<&[u8]>>,
|
stop_words: Option<&fst::Set<&[u8]>>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
) -> Result<(RoaringBitmap, grenad::Reader<File>)> {
|
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||||
let max_positions_per_attributes = max_positions_per_attributes
|
let max_positions_per_attributes = max_positions_per_attributes
|
||||||
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut documents_ids = RoaringBitmap::new();
|
let mut documents_ids = RoaringBitmap::new();
|
||||||
|
let mut script_language_pair = HashMap::new();
|
||||||
let mut docid_word_positions_sorter = create_sorter(
|
let mut docid_word_positions_sorter = create_sorter(
|
||||||
grenad::SortAlgorithm::Stable,
|
grenad::SortAlgorithm::Stable,
|
||||||
concat_u32s_array,
|
concat_u32s_array,
|
||||||
@ -70,6 +73,13 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
|
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
|
||||||
|
|
||||||
for (index, token) in tokens {
|
for (index, token) in tokens {
|
||||||
|
if let Some(language) = token.language {
|
||||||
|
let script = token.script;
|
||||||
|
let entry = script_language_pair
|
||||||
|
.entry((script, language))
|
||||||
|
.or_insert_with(RoaringBitmap::new);
|
||||||
|
entry.push(document_id);
|
||||||
|
}
|
||||||
let token = token.lemma().trim();
|
let token = token.lemma().trim();
|
||||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||||
key_buffer.truncate(mem::size_of::<u32>());
|
key_buffer.truncate(mem::size_of::<u32>());
|
||||||
@ -88,7 +98,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sorter_into_reader(docid_word_positions_sorter, indexer).map(|reader| (documents_ids, reader))
|
sorter_into_reader(docid_word_positions_sorter, indexer)
|
||||||
|
.map(|reader| (documents_ids, reader, script_language_pair))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Transform a JSON value into a string that can be indexed.
|
/// Transform a JSON value into a string that can be indexed.
|
||||||
|
@ -257,13 +257,14 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
|
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
|
||||||
rayon::join(
|
rayon::join(
|
||||||
|| {
|
|| {
|
||||||
let (documents_ids, docid_word_positions_chunk) = extract_docid_word_positions(
|
let (documents_ids, docid_word_positions_chunk, script_language_pair) =
|
||||||
flattened_documents_chunk.clone(),
|
extract_docid_word_positions(
|
||||||
indexer,
|
flattened_documents_chunk.clone(),
|
||||||
searchable_fields,
|
indexer,
|
||||||
stop_words.as_ref(),
|
searchable_fields,
|
||||||
max_positions_per_attributes,
|
stop_words.as_ref(),
|
||||||
)?;
|
max_positions_per_attributes,
|
||||||
|
)?;
|
||||||
|
|
||||||
// send documents_ids to DB writer
|
// send documents_ids to DB writer
|
||||||
let _ = lmdb_writer_sx.send(Ok(TypedChunk::NewDocumentsIds(documents_ids)));
|
let _ = lmdb_writer_sx.send(Ok(TypedChunk::NewDocumentsIds(documents_ids)));
|
||||||
@ -274,6 +275,9 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
let _ = lmdb_writer_sx
|
let _ = lmdb_writer_sx
|
||||||
.send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone())));
|
.send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone())));
|
||||||
|
|
||||||
|
let _ =
|
||||||
|
lmdb_writer_sx.send(Ok(TypedChunk::ScriptLanguageDocids(script_language_pair)));
|
||||||
|
|
||||||
Ok(docid_word_positions_chunk)
|
Ok(docid_word_positions_chunk)
|
||||||
},
|
},
|
||||||
|| {
|
|| {
|
||||||
|
@ -1906,6 +1906,33 @@ mod tests {
|
|||||||
index.add_documents(doc1).unwrap();
|
index.add_documents(doc1).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "default")]
|
||||||
|
#[test]
|
||||||
|
fn store_detected_script_and_language_per_document_during_indexing() {
|
||||||
|
use charabia::{Language, Script};
|
||||||
|
let index = TempIndex::new();
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{ "id": 1, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
|
||||||
|
{ "id": 2, "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
|
||||||
|
{ "id": 3, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
|
||||||
|
{ "id": 4, "title": "関西国際空港限定トートバッグ すもももももももものうち" },
|
||||||
|
{ "id": 5, "title": "ภาษาไทยง่ายนิดเดียว" },
|
||||||
|
{ "id": 6, "title": "The quick 在尊嚴和權利上一律平等。" },
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let key_jpn = (Script::Cj, Language::Jpn);
|
||||||
|
let key_cmn = (Script::Cj, Language::Cmn);
|
||||||
|
let cj_jpn_docs = index.script_language_documents_ids(&rtxn, &key_jpn).unwrap().unwrap();
|
||||||
|
let cj_cmn_docs = index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap();
|
||||||
|
let expected_cj_jpn_docids = [3].iter().collect();
|
||||||
|
assert_eq!(cj_jpn_docs, expected_cj_jpn_docids);
|
||||||
|
let expected_cj_cmn_docids = [1, 5].iter().collect();
|
||||||
|
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn add_and_delete_documents_in_single_transform() {
|
fn add_and_delete_documents_in_single_transform() {
|
||||||
let mut index = TempIndex::new();
|
let mut index = TempIndex::new();
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
|
use charabia::{Language, Script};
|
||||||
use grenad::MergerBuilder;
|
use grenad::MergerBuilder;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use heed::{BytesDecode, RwTxn};
|
use heed::{BytesDecode, RwTxn};
|
||||||
@ -38,6 +40,7 @@ pub(crate) enum TypedChunk {
|
|||||||
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetExistsDocids(grenad::Reader<File>),
|
FieldIdFacetExistsDocids(grenad::Reader<File>),
|
||||||
GeoPoints(grenad::Reader<File>),
|
GeoPoints(grenad::Reader<File>),
|
||||||
|
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
||||||
@ -210,6 +213,24 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
index.put_geo_rtree(wtxn, &rtree)?;
|
index.put_geo_rtree(wtxn, &rtree)?;
|
||||||
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
||||||
}
|
}
|
||||||
|
TypedChunk::ScriptLanguageDocids(hash_pair) => {
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
for (key, value) in hash_pair {
|
||||||
|
buffer.clear();
|
||||||
|
let final_value = match index.script_language_docids.get(wtxn, &key)? {
|
||||||
|
Some(db_values) => {
|
||||||
|
let mut db_value_buffer = Vec::new();
|
||||||
|
serialize_roaring_bitmap(&db_values, &mut db_value_buffer)?;
|
||||||
|
let mut new_value_buffer = Vec::new();
|
||||||
|
serialize_roaring_bitmap(&value, &mut new_value_buffer)?;
|
||||||
|
merge_roaring_bitmaps(&new_value_buffer, &db_value_buffer, &mut buffer)?;
|
||||||
|
RoaringBitmap::deserialize_from(&buffer[..])?
|
||||||
|
}
|
||||||
|
None => value,
|
||||||
|
};
|
||||||
|
index.script_language_docids.put(wtxn, &key, &final_value)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((RoaringBitmap::new(), is_merged_database))
|
Ok((RoaringBitmap::new(), is_merged_database))
|
||||||
|
@ -11,6 +11,7 @@ pub struct IndexerConfig {
|
|||||||
pub chunk_compression_level: Option<u32>,
|
pub chunk_compression_level: Option<u32>,
|
||||||
pub thread_pool: Option<ThreadPool>,
|
pub thread_pool: Option<ThreadPool>,
|
||||||
pub max_positions_per_attributes: Option<u32>,
|
pub max_positions_per_attributes: Option<u32>,
|
||||||
|
pub skip_index_budget: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for IndexerConfig {
|
impl Default for IndexerConfig {
|
||||||
@ -24,6 +25,7 @@ impl Default for IndexerConfig {
|
|||||||
chunk_compression_level: None,
|
chunk_compression_level: None,
|
||||||
thread_pool: None,
|
thread_pool: None,
|
||||||
max_positions_per_attributes: None,
|
max_positions_per_attributes: None,
|
||||||
|
skip_index_budget: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -32,15 +32,6 @@ fn test_phrase_search_with_stop_words_given_criteria(criteria: &[Criterion]) {
|
|||||||
let result = search.execute().unwrap();
|
let result = search.execute().unwrap();
|
||||||
// 1 document should match
|
// 1 document should match
|
||||||
assert_eq!(result.documents_ids.len(), 1);
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
|
|
||||||
// test for a single stop word only, no other search terms
|
|
||||||
let mut search = Search::new(&txn, &index);
|
|
||||||
search.query("\"the\"");
|
|
||||||
search.limit(10);
|
|
||||||
search.authorize_typos(false);
|
|
||||||
search.terms_matching_strategy(TermsMatchingStrategy::All);
|
|
||||||
let result = search.execute().unwrap();
|
|
||||||
assert_eq!(result.documents_ids.len(), 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
Reference in New Issue
Block a user