mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-18 12:20:48 +00:00
Compare commits
264 Commits
hackaton-r
...
prototype-
Author | SHA1 | Date | |
---|---|---|---|
fa27bf3513 | |||
09ebf7428b | |||
76e0248cdb | |||
7f0abaf582 | |||
6f135457f8 | |||
41f3a30b0b | |||
9090e0fe9d | |||
b3098b9d9a | |||
9b1de777de | |||
00f0711207 | |||
5751f5c640 | |||
3d23b388bc | |||
85626cff8e | |||
b366acdae6 | |||
7cb7e37ba8 | |||
33b7c574ea | |||
d3575fb028 | |||
39cbb499c2 | |||
ebef6bc24d | |||
d59b7db8d0 | |||
263e825619 | |||
69354a6144 | |||
b0adc73ce6 | |||
2b5d9042d1 | |||
5b57fbab08 | |||
72d3fa4898 | |||
772964125d | |||
378deb0bef | |||
1f36410541 | |||
b11f85a635 | |||
a2d6dc8571 | |||
ee1701157f | |||
8c649d8061 | |||
492fc086f0 | |||
a2d0c73b41 | |||
264b10ec20 | |||
825257da76 | |||
f8289cd974 | |||
3053e01c05 | |||
b11c2afac0 | |||
9cef800b2a | |||
db2fb86b8b | |||
882ab9cc85 | |||
5a9c96e1db | |||
70ce40828c | |||
688266c83e | |||
6dab826908 | |||
1e2fbc6a42 | |||
523519fdbf | |||
ef6fa10f7a | |||
620fee35f9 | |||
cbaa54cafd | |||
1bccf2079e | |||
1b2ea6cf19 | |||
1ad1fcc8c8 | |||
48865470d7 | |||
c810df4d9f | |||
87610a5f98 | |||
2544bc1416 | |||
ff522c919d | |||
1c39459cf4 | |||
bf0651f23c | |||
5b20e625f3 | |||
bc51d6157a | |||
1b4ff991c0 | |||
4b64c33aa2 | |||
12323d610e | |||
44e9033b3a | |||
4d864f0702 | |||
5e3df76699 | |||
02765fb267 | |||
841165d529 | |||
ea4a266f08 | |||
49f069ed97 | |||
be16b99d40 | |||
ec0c09d17c | |||
a9230f6e6c | |||
b10c060bf7 | |||
e507ef5932 | |||
c71b1d33ae | |||
0fc446c62f | |||
0fb6acefc3 | |||
b1d1355b69 | |||
f19332466e | |||
03ddb4f310 | |||
c855cc2721 | |||
da0503ef80 | |||
54f0ee1ed2 | |||
94206b0055 | |||
b40253bf18 | |||
d8bf3f3fc2 | |||
9d59e8011a | |||
dad78cbf8d | |||
4e91707a06 | |||
de10f20732 | |||
ce5647e730 | |||
b57b818b67 | |||
f7ea94e5f4 | |||
be395c7944 | |||
9fedd8101a | |||
54d07a8da3 | |||
53382bb1b8 | |||
5b004a2583 | |||
13416ccbf7 | |||
58690dfb19 | |||
abf424ebfc | |||
dfab6293c9 | |||
fdf3f7f627 | |||
6260cff65f | |||
8e0d9c9a5e | |||
ae4ec8ea55 | |||
652ac3052d | |||
9a2dccc3bc | |||
a35988550c | |||
e78281785c | |||
3c15881818 | |||
73c06d31d9 | |||
290e773d23 | |||
fa6c7f65ca | |||
113527f466 | |||
c534a1b687 | |||
2263dff02b | |||
d651b3ef01 | |||
762b0b47e6 | |||
01d5eedf2f | |||
073f89db79 | |||
8370fbc92b | |||
85f42fbc03 | |||
c6b3c18c85 | |||
bafeb892a7 | |||
8fb221dae3 | |||
5be569e3e2 | |||
946c762d28 | |||
cda6ca1ee6 | |||
696fcf4d18 | |||
476e4d3dbe | |||
576fa9c6da | |||
77dcbff6b2 | |||
544440c363 | |||
a3dae4db9b | |||
ba90a5ec0e | |||
b26dc9aabe | |||
66abac9364 | |||
59f88c14b3 | |||
14832cb324 | |||
04ec293024 | |||
f67ff3a738 | |||
560e8f5613 | |||
2d3f15f82c | |||
40186bf403 | |||
87e3d27878 | |||
6bcf8b4f8c | |||
46aa75abdb | |||
2597bbd107 | |||
e2bc054604 | |||
fcd3a1434d | |||
a82dee21e0 | |||
bc45c1206d | |||
6ae4100f07 | |||
0c47defeee | |||
313b16bec2 | |||
1dd97578a8 | |||
f5ef69293b | |||
1c5705c164 | |||
66c2c82a18 | |||
28a8d0ccda | |||
96be85396d | |||
df9e5c8651 | |||
b541d48847 | |||
8ccf32d1a0 | |||
db1ca21231 | |||
11ea5acff9 | |||
8d77736a67 | |||
748b333161 | |||
17b647dfe5 | |||
2614e7d9ca | |||
e7244aa485 | |||
9cacc82307 | |||
4c6fddb1cb | |||
62ea81bef6 | |||
f28f09ae2f | |||
ca52021079 | |||
ee6f79d60b | |||
e4c24ca6a3 | |||
2bae9550c8 | |||
32c78ac8b1 | |||
5fe7c4545a | |||
2042229927 | |||
eae9eab181 | |||
cf8dad1ca0 | |||
dd619913da | |||
9b55ff16e9 | |||
e761db582f | |||
d8c649b3cd | |||
5e0485d8dd | |||
27eec21415 | |||
62cc97ba70 | |||
fed59cc1d5 | |||
2b3adef796 | |||
956cfc5487 | |||
12fc878640 | |||
0a2e8b92a9 | |||
c7a3f80de6 | |||
029d4de043 | |||
549f1bcccf | |||
689ec7c7ad | |||
3655d4bdca | |||
055ca3935b | |||
1b8871a585 | |||
bf8fac6676 | |||
f2a9e1ebbb | |||
c45c6cf54c | |||
513e61e9a3 | |||
90a626bf80 | |||
0d4acf2daa | |||
58db8d85ec | |||
62dfd09dc6 | |||
656dadabea | |||
c5f7893fbb | |||
8cf2ccf168 | |||
0913373a5e | |||
1a7f1282af | |||
bc747aac3a | |||
be92376ab3 | |||
cf7e355735 | |||
5f09d89ad1 | |||
6ecb26a3f8 | |||
76c6f554d6 | |||
f343ef5f2f | |||
96982a768a | |||
fca78fbc46 | |||
67a678cfb6 | |||
d1331d8abf | |||
19ba129165 | |||
d4da06ff47 | |||
3e0471edae | |||
432df03c4c | |||
11958016dd | |||
63c250a04d | |||
06d8cd5b72 | |||
c0f2724c2d | |||
d772073dfa | |||
8fe8ddea79 | |||
8a95bf28e5 | |||
c0fd3dffb8 | |||
c42fd5375f | |||
b418c3a756 | |||
1cde455758 | |||
ca19bae72f | |||
705878ff59 | |||
92c280d1c8 | |||
181e7a1e53 | |||
2e5abb4d2c | |||
44aaf5d9e3 | |||
ff0ababf65 | |||
c5336af1c5 | |||
1567758a56 | |||
37953afe1a | |||
43989fe2e4 | |||
de3f992ae4 | |||
c668a29ed5 | |||
98f0618065 | |||
b10eeb0e41 | |||
4a8515e9fc |
8
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
8
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
@ -7,19 +7,17 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
|
||||
Related product team resources: [PRD]() (_internal only_)
|
||||
Related product discussion:
|
||||
Related spec: WIP
|
||||
|
||||
## Motivation
|
||||
|
||||
<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
|
||||
<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->
|
||||
|
||||
## Usage
|
||||
|
||||
<!---Write a quick description of the usage if the usage has already been defined-->
|
||||
|
||||
Refer to the final spec to know the details and the final decisions about the usage.
|
||||
<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->
|
||||
|
||||
## TODO
|
||||
|
||||
|
2
.github/workflows/benchmarks-manual.yml
vendored
2
.github/workflows/benchmarks-manual.yml
vendored
@ -74,4 +74,4 @@ jobs:
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
|
||||
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
|
||||
echo " - Run the following command: ./benchmaks/scripts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
|
||||
|
98
.github/workflows/benchmarks-pr.yml
vendored
Normal file
98
.github/workflows/benchmarks-pr.yml
vendored
Normal file
@ -0,0 +1,98 @@
|
||||
name: Benchmarks (PR)
|
||||
on: issue_comment
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
|
||||
jobs:
|
||||
run-benchmarks-on-comment:
|
||||
if: startsWith(github.event.comment.body, '/benchmark')
|
||||
name: Run and upload benchmarks
|
||||
runs-on: benchmarks
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Check for Command
|
||||
id: command
|
||||
uses: xt0rted/slash-command-action@v2
|
||||
with:
|
||||
command: benchmark
|
||||
reaction-type: "eyes"
|
||||
repo-token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: xt0rted/pull-request-comment-branch@v2
|
||||
id: comment-branch
|
||||
with:
|
||||
repo_token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
if: success()
|
||||
with:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
shell: bash
|
||||
run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT
|
||||
id: current_branch
|
||||
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
|
||||
shell: bash
|
||||
run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT
|
||||
id: normalized_current_branch
|
||||
- name: Set shorter commit SHA
|
||||
shell: bash
|
||||
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
|
||||
id: commit_sha
|
||||
- name: Set file basename with format "dataset_branch_commitSHA"
|
||||
shell: bash
|
||||
run: echo "basename=$(echo ${{ steps.command.outputs.command-arguments }}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
|
||||
id: file
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${{ steps.command.outputs.command-arguments }} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
|
||||
run: |
|
||||
cd benchmarks
|
||||
cargo bench --bench ${{ steps.command.outputs.command-arguments }} -- --save-baseline ${{ steps.file.outputs.basename }}
|
||||
|
||||
# Generate critcmp files
|
||||
- name: Install critcmp
|
||||
uses: taiki-e/install-action@v2
|
||||
with:
|
||||
tool: critcmp
|
||||
- name: Export cripcmp file
|
||||
run: |
|
||||
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
|
||||
|
||||
# Upload benchmarks
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
|
||||
uses: BetaHuhn/do-spaces-action@v2
|
||||
with:
|
||||
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
|
||||
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
|
||||
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
|
||||
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
|
||||
source: ${{ steps.file.outputs.basename }}.json
|
||||
out_dir: critcmp_results
|
||||
|
||||
# Compute the diff of the benchmarks and send a message on the GitHub PR
|
||||
- name: Compute and send a message in the PR
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
run: |
|
||||
set -x
|
||||
export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
|
||||
export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
|
||||
export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
|
||||
echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
|
||||
echo '```' >> body.txt
|
||||
./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
|
||||
echo '```' >> body.txt
|
||||
gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt
|
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@ -50,7 +50,7 @@ jobs:
|
||||
needs: check-version
|
||||
steps:
|
||||
- name: Create PR to Homebrew
|
||||
uses: mislav/bump-homebrew-formula-action@v2
|
||||
uses: mislav/bump-homebrew-formula-action@v3
|
||||
with:
|
||||
formula-name: meilisearch
|
||||
formula-path: Formula/m/meilisearch.rb
|
||||
|
10
.github/workflows/publish-docker-images.yml
vendored
10
.github/workflows/publish-docker-images.yml
vendored
@ -57,20 +57,20 @@ jobs:
|
||||
echo "date=$commit_date" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: getmeili/meilisearch
|
||||
# Prevent `latest` to be updated for each new tag pushed.
|
||||
@ -83,7 +83,7 @@ jobs:
|
||||
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v4
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
4
.github/workflows/sdks-tests.yml
vendored
4
.github/workflows/sdks-tests.yml
vendored
@ -160,7 +160,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
cache: 'yarn'
|
||||
- name: Install dependencies
|
||||
@ -318,7 +318,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js-plugins
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
cache: yarn
|
||||
- name: Install dependencies
|
||||
|
10
.github/workflows/test-suite.yml
vendored
10
.github/workflows/test-suite.yml
vendored
@ -43,7 +43,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -149,7 +149,7 @@ jobs:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -168,7 +168,7 @@ jobs:
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -187,7 +187,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
967
Cargo.lock
generated
967
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -2,6 +2,7 @@
|
||||
resolver = "2"
|
||||
members = [
|
||||
"meilisearch",
|
||||
"meilitool",
|
||||
"meilisearch-types",
|
||||
"meilisearch-auth",
|
||||
"meili-snap",
|
||||
@ -18,7 +19,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.4.0"
|
||||
version = "1.5.0"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
11
Dockerfile
11
Dockerfile
@ -3,7 +3,7 @@ FROM rust:alpine3.16 AS compiler
|
||||
|
||||
RUN apk add -q --update-cache --no-cache build-base openssl-dev
|
||||
|
||||
WORKDIR /meilisearch
|
||||
WORKDIR /
|
||||
|
||||
ARG COMMIT_SHA
|
||||
ARG COMMIT_DATE
|
||||
@ -17,7 +17,7 @@ RUN set -eux; \
|
||||
if [ "$apkArch" = "aarch64" ]; then \
|
||||
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
|
||||
fi && \
|
||||
cargo build --release
|
||||
cargo build --release -p meilisearch -p meilitool
|
||||
|
||||
# Run
|
||||
FROM alpine:3.16
|
||||
@ -28,9 +28,10 @@ ENV MEILI_SERVER_PROVIDER docker
|
||||
RUN apk update --quiet \
|
||||
&& apk add -q --no-cache libgcc tini curl
|
||||
|
||||
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
|
||||
# to find.
|
||||
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
|
||||
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
|
||||
# and it's easy to find.
|
||||
COPY --from=compiler /target/release/meilisearch /bin/meilisearch
|
||||
COPY --from=compiler /target/release/meilitool /bin/meilitool
|
||||
# To stay compatible with the older version of the container (pre v0.27.0) we're
|
||||
# going to symlink the meilisearch binary in the path to `/meilisearch`
|
||||
RUN ln -s /bin/meilisearch /meilisearch
|
||||
|
@ -1,14 +1,14 @@
|
||||
# Profiling Meilisearch
|
||||
|
||||
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
|
||||
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).
|
||||
|
||||

|
||||
|
||||
## Profiling the Indexing Process
|
||||
|
||||
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||
When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||
|
||||
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
|
||||
[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.
|
||||
|
||||
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
|
||||
|
||||
|
@ -6,9 +6,7 @@ use std::path::Path;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use milli::heed::{EnvOpenOptions, RwTxn};
|
||||
use milli::update::{
|
||||
DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
|
||||
};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::Index;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand_chacha::rand_core::SeedableRng;
|
||||
@ -266,17 +264,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
delete_documents_from_ids(index, document_ids_to_delete)
|
||||
},
|
||||
)
|
||||
});
|
||||
@ -613,17 +601,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
delete_documents_from_ids(index, document_ids_to_delete)
|
||||
},
|
||||
)
|
||||
});
|
||||
@ -875,22 +853,31 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
delete_documents_from_ids(index, document_ids_to_delete)
|
||||
},
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBitmap>) {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let indexer_config = IndexerConfig::default();
|
||||
for ids in document_ids_to_delete {
|
||||
let config = IndexDocumentsConfig::default();
|
||||
|
||||
let mut builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &indexer_config, config, |_| (), || false)
|
||||
.unwrap();
|
||||
(builder, _) = builder.remove_documents_from_db_no_batch(&ids).unwrap();
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
}
|
||||
|
||||
fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
@ -1112,17 +1099,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
delete_documents_from_ids(index, document_ids_to_delete)
|
||||
},
|
||||
)
|
||||
});
|
||||
@ -1338,17 +1315,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
delete_documents_from_ids(index, document_ids_to_delete)
|
||||
},
|
||||
)
|
||||
});
|
||||
|
@ -526,12 +526,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.688964637Z",
|
||||
"updatedAt": "2022-10-09T20:27:23.951017769Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -541,12 +541,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.197788495Z",
|
||||
"updatedAt": "2022-10-09T20:28:01.93111053Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -571,12 +571,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:24.242683494Z",
|
||||
"updatedAt": "2022-10-09T20:27:24.312809641Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -617,12 +617,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.595257Z",
|
||||
"updatedAt": "2023-01-30T16:25:58.70348Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -632,12 +632,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.192178Z",
|
||||
"updatedAt": "2023-01-30T16:25:56.455714Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -647,12 +647,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:58.876405Z",
|
||||
"updatedAt": "2023-01-30T16:25:59.079906Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -1,24 +0,0 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: spells.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: products.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {
|
||||
"android": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"iphone": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"phone": [
|
||||
"android",
|
||||
"iphone",
|
||||
"smartphone"
|
||||
]
|
||||
},
|
||||
"distinctAttribute": null
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: movies.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"id"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"genres",
|
||||
"id"
|
||||
],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness",
|
||||
"release_date:asc"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
@ -46,6 +46,7 @@ pub type Checked = settings::Checked;
|
||||
pub type Unchecked = settings::Unchecked;
|
||||
|
||||
pub type Task = updates::UpdateEntry;
|
||||
pub type Kind = updates::UpdateMeta;
|
||||
|
||||
// everything related to the errors
|
||||
pub type ResponseError = errors::ResponseError;
|
||||
@ -107,8 +108,11 @@ impl V2Reader {
|
||||
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
|
||||
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
|
||||
V2IndexReader::new(
|
||||
index.uid.clone(),
|
||||
&self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
|
||||
index,
|
||||
BufReader::new(
|
||||
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
|
||||
),
|
||||
)
|
||||
}))
|
||||
}
|
||||
@ -143,16 +147,41 @@ pub struct V2IndexReader {
|
||||
}
|
||||
|
||||
impl V2IndexReader {
|
||||
pub fn new(name: String, path: &Path) -> Result<Self> {
|
||||
pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
|
||||
let meta = File::open(path.join("meta.json"))?;
|
||||
let meta: DumpMeta = serde_json::from_reader(meta)?;
|
||||
|
||||
let mut created_at = None;
|
||||
let mut updated_at = None;
|
||||
|
||||
for line in tasks.lines() {
|
||||
let task: Task = serde_json::from_str(&line?)?;
|
||||
if !(task.uuid == index_uuid.uuid && task.is_finished()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let new_created_at = match task.update.meta() {
|
||||
Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
|
||||
_ => None,
|
||||
};
|
||||
let new_updated_at = task.update.finished_at();
|
||||
|
||||
if created_at.is_none() || created_at > new_created_at {
|
||||
created_at = new_created_at;
|
||||
}
|
||||
|
||||
if updated_at.is_none() || updated_at < new_updated_at {
|
||||
updated_at = new_updated_at;
|
||||
}
|
||||
}
|
||||
|
||||
let current_time = OffsetDateTime::now_utc();
|
||||
|
||||
let metadata = IndexMetadata {
|
||||
uid: name,
|
||||
uid: index_uuid.uid.clone(),
|
||||
primary_key: meta.primary_key,
|
||||
// FIXME: Iterate over the whole task queue to find the creation and last update date.
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
updated_at: OffsetDateTime::now_utc(),
|
||||
created_at: created_at.unwrap_or(current_time),
|
||||
updated_at: updated_at.unwrap_or(current_time),
|
||||
};
|
||||
|
||||
let ret = V2IndexReader {
|
||||
@ -248,12 +277,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.688964637Z",
|
||||
"updatedAt": "2022-10-09T20:27:23.951017769Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -263,12 +292,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.197788495Z",
|
||||
"updatedAt": "2022-10-09T20:28:01.93111053Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -293,12 +322,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:24.242683494Z",
|
||||
"updatedAt": "2022-10-09T20:27:24.312809641Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -340,12 +369,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.595257Z",
|
||||
"updatedAt": "2023-01-30T16:25:58.70348Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -355,12 +384,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.192178Z",
|
||||
"updatedAt": "2023-01-30T16:25:56.455714Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -370,12 +399,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:58.876405Z",
|
||||
"updatedAt": "2023-01-30T16:25:59.079906Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -227,4 +227,14 @@ impl UpdateStatus {
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finished_at(&self) -> Option<OffsetDateTime> {
|
||||
match self {
|
||||
UpdateStatus::Processing(_) => None,
|
||||
UpdateStatus::Enqueued(_) => None,
|
||||
UpdateStatus::Processed(u) => Some(u.processed_at),
|
||||
UpdateStatus::Aborted(_) => None,
|
||||
UpdateStatus::Failed(u) => Some(u.failed_at),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -18,11 +18,12 @@ derive_builder = "0.12.0"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.28"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
puffin = "0.16.0"
|
||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
@ -30,6 +31,7 @@ synchronoise = "1.0.1"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
ureq = "2.9.1"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
@ -19,18 +19,18 @@ one indexing operation.
|
||||
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt;
|
||||
use std::fs::{self, File};
|
||||
use std::io::BufWriter;
|
||||
|
||||
use dump::IndexMetadata;
|
||||
use log::{debug, error, info};
|
||||
use log::{debug, error, info, trace};
|
||||
use meilisearch_types::error::Code;
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::heed::CompactionOption;
|
||||
use meilisearch_types::milli::update::{
|
||||
DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||
Settings as MilliSettings,
|
||||
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
|
||||
};
|
||||
use meilisearch_types::milli::{self, Filter, BEU32};
|
||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||
@ -43,7 +43,7 @@ use uuid::Uuid;
|
||||
|
||||
use crate::autobatcher::{self, BatchKind};
|
||||
use crate::utils::{self, swap_index_uid_in_task};
|
||||
use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
|
||||
use crate::{Error, IndexScheduler, MustStopProcessing, ProcessingTasks, Result, TaskId};
|
||||
|
||||
/// Represents a combination of tasks that can all be processed at the same time.
|
||||
///
|
||||
@ -104,12 +104,6 @@ pub(crate) enum IndexOperation {
|
||||
operations: Vec<DocumentOperation>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
DocumentDeletion {
|
||||
index_uid: String,
|
||||
// The vec associated with each document deletion tasks.
|
||||
documents: Vec<Vec<String>>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
IndexDocumentDeletionByFilter {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
@ -161,7 +155,6 @@ impl Batch {
|
||||
}
|
||||
Batch::IndexOperation { op, .. } => match op {
|
||||
IndexOperation::DocumentOperation { tasks, .. }
|
||||
| IndexOperation::DocumentDeletion { tasks, .. }
|
||||
| IndexOperation::Settings { tasks, .. }
|
||||
| IndexOperation::DocumentClear { tasks, .. } => {
|
||||
tasks.iter().map(|task| task.uid).collect()
|
||||
@ -199,11 +192,33 @@ impl Batch {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Batch {
|
||||
/// A text used when we debug the profiling reports.
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let index_uid = self.index_uid();
|
||||
let tasks = self.ids();
|
||||
match self {
|
||||
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
|
||||
Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
|
||||
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
|
||||
Batch::Dump(_) => f.write_str("Dump")?,
|
||||
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
|
||||
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
};
|
||||
match index_uid {
|
||||
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
|
||||
None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexOperation {
|
||||
pub fn index_uid(&self) -> &str {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { index_uid, .. }
|
||||
| IndexOperation::DocumentDeletion { index_uid, .. }
|
||||
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
|
||||
| IndexOperation::DocumentClear { index_uid, .. }
|
||||
| IndexOperation::Settings { index_uid, .. }
|
||||
@ -213,6 +228,27 @@ impl IndexOperation {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for IndexOperation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::DocumentOperation")
|
||||
}
|
||||
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
|
||||
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
|
||||
}
|
||||
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
|
||||
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
|
||||
IndexOperation::DocumentClearAndSetting { .. } => {
|
||||
f.write_str("IndexOperation::DocumentClearAndSetting")
|
||||
}
|
||||
IndexOperation::SettingsAndDocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::SettingsAndDocumentOperation")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
|
||||
///
|
||||
@ -300,18 +336,27 @@ impl IndexScheduler {
|
||||
BatchKind::DocumentDeletion { deletion_ids } => {
|
||||
let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;
|
||||
|
||||
let mut documents = Vec::new();
|
||||
let mut operations = Vec::with_capacity(tasks.len());
|
||||
let mut documents_counts = Vec::with_capacity(tasks.len());
|
||||
for task in &tasks {
|
||||
match task.kind {
|
||||
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
|
||||
documents.push(documents_ids.clone())
|
||||
operations.push(DocumentOperation::Delete(documents_ids.clone()));
|
||||
documents_counts.push(documents_ids.len() as u64);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentDeletion { index_uid, documents, tasks },
|
||||
op: IndexOperation::DocumentOperation {
|
||||
index_uid,
|
||||
primary_key: None,
|
||||
method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
documents_counts,
|
||||
operations,
|
||||
tasks,
|
||||
},
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
@ -581,7 +626,7 @@ impl IndexScheduler {
|
||||
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
||||
}
|
||||
|
||||
puffin::profile_function!(format!("{:?}", batch));
|
||||
puffin::profile_function!(batch.to_string());
|
||||
|
||||
match batch {
|
||||
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
||||
@ -777,6 +822,10 @@ impl IndexScheduler {
|
||||
// 2. dump the tasks
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
for ret in self.all_tasks.iter(&rtxn)? {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (_, mut t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
@ -797,6 +846,9 @@ impl IndexScheduler {
|
||||
|
||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file) = content_file {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
if status == Status::Enqueued {
|
||||
let content_file = self.file_store.get_update(content_file)?;
|
||||
|
||||
@ -836,6 +888,9 @@ impl IndexScheduler {
|
||||
|
||||
// 3.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
let (_id, doc) = ret?;
|
||||
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
index_dumper.push_document(&document)?;
|
||||
@ -848,13 +903,16 @@ impl IndexScheduler {
|
||||
})?;
|
||||
|
||||
// 4. Dump experimental feature settings
|
||||
let features = self.features()?.runtime_features();
|
||||
let features = self.features().runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
|
||||
let file = File::create(path)?;
|
||||
dump.persist_to(BufWriter::new(file))?;
|
||||
@ -875,6 +933,10 @@ impl IndexScheduler {
|
||||
self.index_mapper.index(&rtxn, &index_uid)?
|
||||
};
|
||||
|
||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||
*self.currently_updating_index.write().unwrap() =
|
||||
Some((index_uid.clone(), index.clone()));
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||
index_wtxn.commit()?;
|
||||
@ -1143,7 +1205,7 @@ impl IndexScheduler {
|
||||
index,
|
||||
indexer_config,
|
||||
config,
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| trace!("update: {:?}", indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
|
||||
@ -1190,7 +1252,8 @@ impl IndexScheduler {
|
||||
let (new_builder, user_result) =
|
||||
builder.remove_documents(document_ids)?;
|
||||
builder = new_builder;
|
||||
|
||||
// Uses Invariant: remove documents actually always returns Ok for the inner result
|
||||
let count = user_result.unwrap();
|
||||
let provided_ids =
|
||||
if let Some(Details::DocumentDeletion { provided_ids, .. }) =
|
||||
task.details
|
||||
@ -1201,23 +1264,11 @@ impl IndexScheduler {
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
match user_result {
|
||||
Ok(count) => {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentDeletion {
|
||||
provided_ids,
|
||||
deleted_documents: Some(count),
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
task.status = Status::Failed;
|
||||
task.details = Some(Details::DocumentDeletion {
|
||||
provided_ids,
|
||||
deleted_documents: Some(0),
|
||||
});
|
||||
task.error = Some(milli::Error::from(e).into());
|
||||
}
|
||||
}
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentDeletion {
|
||||
provided_ids,
|
||||
deleted_documents: Some(count),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1232,31 +1283,13 @@ impl IndexScheduler {
|
||||
milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||
builder.reset_primary_key();
|
||||
builder.execute(
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| trace!("update: {:?}", indexing_step),
|
||||
|| must_stop_processing.clone().get(),
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
|
||||
let mut builder = milli::update::DeleteDocuments::new(index_wtxn, index)?;
|
||||
documents.iter().flatten().for_each(|id| {
|
||||
builder.delete_external_id(id);
|
||||
});
|
||||
|
||||
let DocumentDeletionResult { deleted_documents, .. } = builder.execute()?;
|
||||
|
||||
for (task, documents) in tasks.iter_mut().zip(documents) {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentDeletion {
|
||||
provided_ids: documents.len(),
|
||||
deleted_documents: Some(deleted_documents.min(documents.len() as u64)),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
|
||||
let filter =
|
||||
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
|
||||
@ -1266,7 +1299,13 @@ impl IndexScheduler {
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
|
||||
let deleted_documents = delete_document_by_filter(
|
||||
index_wtxn,
|
||||
filter,
|
||||
self.index_mapper.indexer_config(),
|
||||
self.must_stop_processing.clone(),
|
||||
index,
|
||||
);
|
||||
let original_filter = if let Some(Details::DocumentDeletionByFilter {
|
||||
original_filter,
|
||||
deleted_documents: _,
|
||||
@ -1500,6 +1539,8 @@ impl IndexScheduler {
|
||||
fn delete_document_by_filter<'a>(
|
||||
wtxn: &mut RwTxn<'a, '_>,
|
||||
filter: &serde_json::Value,
|
||||
indexer_config: &IndexerConfig,
|
||||
must_stop_processing: MustStopProcessing,
|
||||
index: &'a Index,
|
||||
) -> Result<u64> {
|
||||
let filter = Filter::from_json(filter)?;
|
||||
@ -1510,9 +1551,26 @@ fn delete_document_by_filter<'a>(
|
||||
}
|
||||
e => e.into(),
|
||||
})?;
|
||||
let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
|
||||
delete_operation.delete_documents(&candidates);
|
||||
delete_operation.execute().map(|result| result.deleted_documents)?
|
||||
|
||||
let config = IndexDocumentsConfig {
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut builder = milli::update::IndexDocuments::new(
|
||||
wtxn,
|
||||
index,
|
||||
indexer_config,
|
||||
config,
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
|
||||
let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?;
|
||||
builder = new_builder;
|
||||
|
||||
let _ = builder.execute()?;
|
||||
count
|
||||
} else {
|
||||
0
|
||||
})
|
||||
|
@ -108,6 +108,8 @@ pub enum Error {
|
||||
TaskDeletionWithEmptyQuery,
|
||||
#[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
|
||||
TaskCancelationWithEmptyQuery,
|
||||
#[error("Aborted task")]
|
||||
AbortedTask,
|
||||
|
||||
#[error(transparent)]
|
||||
Dump(#[from] dump::Error),
|
||||
@ -175,6 +177,7 @@ impl Error {
|
||||
| Error::TaskNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::AbortedTask
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli(_)
|
||||
@ -236,6 +239,9 @@ impl ErrorCode for Error {
|
||||
Error::TaskDatabaseUpdate(_) => Code::Internal,
|
||||
Error::CreateBatch(_) => Code::Internal,
|
||||
|
||||
// This one should never be seen by the end user
|
||||
Error::AbortedTask => Code::Internal,
|
||||
|
||||
#[cfg(test)]
|
||||
Error::PlannedFailure => Code::Internal,
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn};
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
@ -9,20 +11,19 @@ const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct FeatureData {
|
||||
runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
instance: InstanceTogglableFeatures,
|
||||
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct RoFeatures {
|
||||
runtime: RuntimeTogglableFeatures,
|
||||
instance: InstanceTogglableFeatures,
|
||||
}
|
||||
|
||||
impl RoFeatures {
|
||||
fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
|
||||
let runtime = data.runtime_features(txn)?;
|
||||
Ok(Self { runtime, instance: data.instance })
|
||||
fn new(data: &FeatureData) -> Self {
|
||||
let runtime = data.runtime_features();
|
||||
Self { runtime }
|
||||
}
|
||||
|
||||
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
||||
@ -43,13 +44,13 @@ impl RoFeatures {
|
||||
}
|
||||
|
||||
pub fn check_metrics(&self) -> Result<()> {
|
||||
if self.instance.metrics {
|
||||
if self.runtime.metrics {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Getting metrics",
|
||||
feature: "metrics",
|
||||
issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/625",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
@ -67,15 +68,36 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_puffin(&self) -> Result<()> {
|
||||
if self.runtime.export_puffin_reports {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Outputting Puffin reports to disk",
|
||||
feature: "export puffin reports",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/693",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Self { runtime: runtime_features, instance: instance_features })
|
||||
let txn = env.read_txn()?;
|
||||
let persisted_features: RuntimeTogglableFeatures =
|
||||
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: instance_features.metrics || persisted_features.metrics,
|
||||
..persisted_features
|
||||
}));
|
||||
|
||||
Ok(Self { persisted: runtime_features_db, runtime })
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(
|
||||
@ -83,16 +105,25 @@ impl FeatureData {
|
||||
mut wtxn: RwTxn,
|
||||
features: RuntimeTogglableFeatures,
|
||||
) -> Result<()> {
|
||||
self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
// safe to unwrap, the lock will only fail if:
|
||||
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
|
||||
// 2. there's a panic while the thread is held -> it is only used for an assignment here.
|
||||
let mut toggled_features = self.runtime.write().unwrap();
|
||||
*toggled_features = features;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
|
||||
Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
|
||||
fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
||||
// sound to unwrap, the lock will only fail if:
|
||||
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
|
||||
// 2. there's a panic while the thread is held -> it is only used for copying the data here
|
||||
*self.runtime.read().unwrap()
|
||||
}
|
||||
|
||||
pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
|
||||
RoFeatures::new(txn, self)
|
||||
pub fn features(&self) -> RoFeatures {
|
||||
RoFeatures::new(self)
|
||||
}
|
||||
}
|
||||
|
@ -30,14 +30,17 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
index_mapper,
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
puffin_frame: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
auth_path: _,
|
||||
version_file_path: _,
|
||||
webhook_url: _,
|
||||
test_breakpoint_sdr: _,
|
||||
planned_failures: _,
|
||||
run_loop_iteration: _,
|
||||
currently_updating_index: _,
|
||||
} = scheduler;
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
|
@ -27,12 +27,14 @@ mod index_mapper;
|
||||
mod insta_snapshot;
|
||||
mod lru;
|
||||
mod utils;
|
||||
mod uuid_codec;
|
||||
pub mod uuid_codec;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader, Read};
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
@ -44,6 +46,8 @@ use dump::{KindDump, TaskDump, UpdateFile};
|
||||
pub use error::Error;
|
||||
pub use features::RoFeatures;
|
||||
use file_store::FileStore;
|
||||
use flate2::bufread::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
@ -51,7 +55,9 @@ use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use puffin::FrameView;
|
||||
use roaring::RoaringBitmap;
|
||||
use synchronoise::SignalEvent;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
@ -167,8 +173,8 @@ impl ProcessingTasks {
|
||||
}
|
||||
|
||||
/// Set the processing tasks to an empty list
|
||||
fn stop_processing(&mut self) {
|
||||
self.processing = RoaringBitmap::new();
|
||||
fn stop_processing(&mut self) -> RoaringBitmap {
|
||||
std::mem::take(&mut self.processing)
|
||||
}
|
||||
|
||||
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
||||
@ -238,6 +244,7 @@ pub struct IndexSchedulerOptions {
|
||||
pub snapshots_path: PathBuf,
|
||||
/// The path to the folder containing the dumps.
|
||||
pub dumps_path: PathBuf,
|
||||
pub webhook_url: Option<String>,
|
||||
/// The maximum size, in bytes, of the task index.
|
||||
pub task_db_size: usize,
|
||||
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
|
||||
@ -314,6 +321,12 @@ pub struct IndexScheduler {
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
|
||||
/// The webhook url we should send tasks to after processing every batches.
|
||||
pub(crate) webhook_url: Option<String>,
|
||||
|
||||
/// A frame to output the indexation profiling files to disk.
|
||||
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
|
||||
|
||||
/// The path used to create the dumps.
|
||||
pub(crate) dumps_path: PathBuf,
|
||||
|
||||
@ -326,6 +339,10 @@ pub struct IndexScheduler {
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
|
||||
/// A few types of long running batches of tasks that act on a single index set this field
|
||||
/// so that a handle to the index is available from other threads (search) in an optimized manner.
|
||||
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
|
||||
|
||||
// ================= test
|
||||
// The next entry is dedicated to the tests.
|
||||
/// Provide a way to set a breakpoint in multiple part of the scheduler.
|
||||
@ -364,10 +381,13 @@ impl IndexScheduler {
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
puffin_frame: self.puffin_frame.clone(),
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
version_file_path: self.version_file_path.clone(),
|
||||
webhook_url: self.webhook_url.clone(),
|
||||
currently_updating_index: self.currently_updating_index.clone(),
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
||||
#[cfg(test)]
|
||||
@ -457,12 +477,15 @@ impl IndexScheduler {
|
||||
env,
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
dumps_path: options.dumps_path,
|
||||
snapshots_path: options.snapshots_path,
|
||||
auth_path: options.auth_path,
|
||||
version_file_path: options.version_file_path,
|
||||
webhook_url: options.webhook_url,
|
||||
currently_updating_index: Arc::new(RwLock::new(None)),
|
||||
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr,
|
||||
@ -572,17 +595,46 @@ impl IndexScheduler {
|
||||
run.wake_up.wait();
|
||||
|
||||
loop {
|
||||
let puffin_enabled = run.features().check_puffin().is_ok();
|
||||
puffin::set_scopes_on(puffin_enabled);
|
||||
puffin::GlobalProfiler::lock().new_frame();
|
||||
|
||||
match run.tick() {
|
||||
Ok(TickOutcome::TickAgain(_)) => (),
|
||||
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
log::error!("{e}");
|
||||
// Wait one second when an irrecoverable error occurs.
|
||||
if !e.is_recoverable() {
|
||||
std::thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Let's write the previous frame to disk but only if
|
||||
// the user wanted to profile with puffin.
|
||||
if puffin_enabled {
|
||||
let mut frame_view = run.puffin_frame.lock();
|
||||
if !frame_view.is_empty() {
|
||||
let now = OffsetDateTime::now_utc();
|
||||
let mut file = match File::create(format!("{}.puffin", now)) {
|
||||
Ok(file) => file,
|
||||
Err(e) => {
|
||||
log::error!("{e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(e) = frame_view.save_to_writer(&mut file) {
|
||||
log::error!("{e}");
|
||||
}
|
||||
if let Err(e) = file.sync_all() {
|
||||
log::error!("{e}");
|
||||
}
|
||||
// We erase this frame view as it is no more useful. We want to
|
||||
// measure the new frames now that we exported the previous ones.
|
||||
*frame_view = FrameView::default();
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
@ -616,6 +668,13 @@ impl IndexScheduler {
|
||||
/// If you need to fetch information from or perform an action on all indexes,
|
||||
/// see the `try_for_each_index` function.
|
||||
pub fn index(&self, name: &str) -> Result<Index> {
|
||||
if let Some((current_name, current_index)) =
|
||||
self.currently_updating_index.read().unwrap().as_ref()
|
||||
{
|
||||
if current_name == name {
|
||||
return Ok(current_index.clone());
|
||||
}
|
||||
}
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, name)
|
||||
}
|
||||
@ -1062,8 +1121,6 @@ impl IndexScheduler {
|
||||
self.breakpoint(Breakpoint::Start);
|
||||
}
|
||||
|
||||
puffin::GlobalProfiler::lock().new_frame();
|
||||
|
||||
self.cleanup_task_queue()?;
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
@ -1099,6 +1156,9 @@ impl IndexScheduler {
|
||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||
};
|
||||
|
||||
// Reset the currently updating index to relinquish the index handle
|
||||
*self.currently_updating_index.write().unwrap() = None;
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
||||
|
||||
@ -1133,7 +1193,8 @@ impl IndexScheduler {
|
||||
// If we have an abortion error we must stop the tick here and re-schedule tasks.
|
||||
Err(Error::Milli(milli::Error::InternalError(
|
||||
milli::InternalError::AbortedIndexation,
|
||||
))) => {
|
||||
)))
|
||||
| Err(Error::AbortedTask) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(Breakpoint::AbortedIndexation);
|
||||
wtxn.abort().map_err(Error::HeedTransaction)?;
|
||||
@ -1189,19 +1250,92 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
let processed = self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;
|
||||
|
||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||
|
||||
// We shouldn't crash the tick function if we can't send data to the webhook.
|
||||
let _ = self.notify_webhook(&processed);
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(Breakpoint::AfterProcessing);
|
||||
|
||||
Ok(TickOutcome::TickAgain(processed_tasks))
|
||||
}
|
||||
|
||||
/// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
|
||||
fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
|
||||
if let Some(ref url) = self.webhook_url {
|
||||
struct TaskReader<'a, 'b> {
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
tasks: &'b mut roaring::bitmap::Iter<'b>,
|
||||
buffer: Vec<u8>,
|
||||
written: usize,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Read for TaskReader<'a, 'b> {
|
||||
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
if self.buffer.is_empty() {
|
||||
match self.tasks.next() {
|
||||
None => return Ok(0),
|
||||
Some(task_id) => {
|
||||
let task = self
|
||||
.index_scheduler
|
||||
.get_task(self.rtxn, task_id)
|
||||
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
|
||||
.ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
Error::CorruptedTaskQueue,
|
||||
)
|
||||
})?;
|
||||
|
||||
serde_json::to_writer(
|
||||
&mut self.buffer,
|
||||
&TaskView::from_task(&task),
|
||||
)?;
|
||||
self.buffer.push(b'\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut to_write = &self.buffer[self.written..];
|
||||
let wrote = io::copy(&mut to_write, &mut buf)?;
|
||||
self.written += wrote as usize;
|
||||
|
||||
// we wrote everything and must refresh our buffer on the next call
|
||||
if self.written == self.buffer.len() {
|
||||
self.written = 0;
|
||||
self.buffer.clear();
|
||||
}
|
||||
|
||||
Ok(wrote as usize)
|
||||
}
|
||||
}
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
let task_reader = TaskReader {
|
||||
rtxn: &rtxn,
|
||||
index_scheduler: self,
|
||||
tasks: &mut updated.into_iter(),
|
||||
buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
|
||||
written: 0,
|
||||
};
|
||||
|
||||
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
|
||||
if let Err(e) = ureq::post(url).set("Content-Encoding", "gzip").send(reader) {
|
||||
log::error!("While sending data to the webhook: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register a task to cleanup the task queue if needed
|
||||
fn cleanup_task_queue(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
@ -1259,9 +1393,8 @@ impl IndexScheduler {
|
||||
Ok(IndexStats { is_indexing, inner_stats: index_stats })
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Result<RoFeatures> {
|
||||
let rtxn = self.read_txn()?;
|
||||
self.features.features(rtxn)
|
||||
pub fn features(&self) -> RoFeatures {
|
||||
self.features.features()
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||
@ -1582,6 +1715,7 @@ mod tests {
|
||||
indexes_path: tempdir.path().join("indexes"),
|
||||
snapshots_path: tempdir.path().join("snapshots"),
|
||||
dumps_path: tempdir.path().join("dumps"),
|
||||
webhook_url: None,
|
||||
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||
enable_mdb_writemap: false,
|
||||
@ -4290,4 +4424,26 @@ mod tests {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_processing_dump() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
|
||||
let dump_cancellation = KindWithContent::TaskCancelation {
|
||||
query: "cancel dump".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0]),
|
||||
};
|
||||
let _ = index_scheduler.register(dump_creation).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
|
||||
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
|
||||
|
||||
let _ = index_scheduler.register(dump_cancellation).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
|
||||
|
||||
snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,35 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -0,0 +1,45 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [1,]
|
||||
canceled [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"taskCancelation" [1,]
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
1 [0,]
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -0,0 +1,38 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"taskCancelation" [1,]
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"]
|
||||
japanese = ["milli/japanese"]
|
||||
# thai specialized tokenization
|
||||
thai = ["milli/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["milli/greek"]
|
||||
# allow khmer specialized tokenization
|
||||
khmer = ["milli/khmer"]
|
||||
|
@ -324,7 +324,6 @@ impl ErrorCode for milli::Error {
|
||||
UserError::SerdeJson(_)
|
||||
| UserError::InvalidLmdbOpenOptions
|
||||
| UserError::DocumentLimitReached
|
||||
| UserError::AccessingSoftDeletedDocument { .. }
|
||||
| UserError::UnknownInternalDocumentId { .. } => Code::Internal,
|
||||
UserError::InvalidStoreFile => Code::InvalidStoreFile,
|
||||
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
|
||||
|
@ -5,6 +5,8 @@ use serde::{Deserialize, Serialize};
|
||||
pub struct RuntimeTogglableFeatures {
|
||||
pub score_details: bool,
|
||||
pub vector_store: bool,
|
||||
pub metrics: bool,
|
||||
pub export_puffin_reports: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
|
@ -9,6 +9,7 @@ pub mod index_uid_pattern;
|
||||
pub mod keys;
|
||||
pub mod settings;
|
||||
pub mod star_or;
|
||||
pub mod task_view;
|
||||
pub mod tasks;
|
||||
pub mod versioning;
|
||||
pub use milli::{heed, Index};
|
||||
|
139
meilisearch-types/src/task_view.rs
Normal file
139
meilisearch-types/src/task_view.rs
Normal file
@ -0,0 +1,139 @@
|
||||
use serde::Serialize;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TaskView {
|
||||
pub uid: TaskId,
|
||||
#[serde(default)]
|
||||
pub index_uid: Option<String>,
|
||||
pub status: Status,
|
||||
#[serde(rename = "type")]
|
||||
pub kind: Kind,
|
||||
pub canceled_by: Option<TaskId>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub details: Option<DetailsView>,
|
||||
pub error: Option<ResponseError>,
|
||||
#[serde(serialize_with = "serialize_duration", default)]
|
||||
pub duration: Option<Duration>,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub started_at: Option<OffsetDateTime>,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl TaskView {
|
||||
pub fn from_task(task: &Task) -> TaskView {
|
||||
TaskView {
|
||||
uid: task.uid,
|
||||
index_uid: task.index_uid().map(ToOwned::to_owned),
|
||||
status: task.status,
|
||||
kind: task.kind.as_kind(),
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details.clone().map(DetailsView::from),
|
||||
error: task.error.clone(),
|
||||
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DetailsView {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub received_documents: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub indexed_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub primary_key: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub provided_ids: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub matched_tasks: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub canceled_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub original_filter: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub dump_uid: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(flatten)]
|
||||
pub settings: Option<Box<Settings<Unchecked>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub swaps: Option<Vec<IndexSwap>>,
|
||||
}
|
||||
|
||||
impl From<Details> for DetailsView {
|
||||
fn from(details: Details) -> Self {
|
||||
match details {
|
||||
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
|
||||
DetailsView {
|
||||
received_documents: Some(received_documents),
|
||||
indexed_documents: Some(indexed_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::SettingsUpdate { settings } => {
|
||||
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexInfo { primary_key } => {
|
||||
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
|
||||
}
|
||||
Details::DocumentDeletion {
|
||||
provided_ids: received_document_ids,
|
||||
deleted_documents,
|
||||
} => DetailsView {
|
||||
provided_ids: Some(received_document_ids),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
original_filter: Some(None),
|
||||
..DetailsView::default()
|
||||
},
|
||||
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
|
||||
DetailsView {
|
||||
provided_ids: Some(0),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::ClearAll { deleted_documents } => {
|
||||
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
|
||||
}
|
||||
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
canceled_tasks: Some(canceled_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
deleted_tasks: Some(deleted_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::Dump { dump_uid } => {
|
||||
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexSwap { swaps } => {
|
||||
DetailsView { swaps: Some(swaps), ..Default::default() }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -69,8 +69,7 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.9"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
puffin = "0.16.0"
|
||||
puffin_http = { version = "0.13.0", optional = true }
|
||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.7.0"
|
||||
regex = "1.7.3"
|
||||
@ -105,6 +104,7 @@ walkdir = "2.3.3"
|
||||
yaup = "0.2.1"
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.2.0"
|
||||
url = { version = "2.5.0", features = ["serde"] }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.8.0"
|
||||
@ -135,7 +135,6 @@ zip = { version = "0.6.4", optional = true }
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
analytics = ["segment"]
|
||||
profile-with-puffin = ["dep:puffin_http"]
|
||||
mini-dashboard = [
|
||||
"actix-web-static-files",
|
||||
"static-files",
|
||||
@ -152,6 +151,7 @@ hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
thai = ["meilisearch-types/thai"]
|
||||
greek = ["meilisearch-types/greek"]
|
||||
khmer = ["meilisearch-types/khmer"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
|
||||
|
@ -288,6 +288,7 @@ impl From<Opt> for Infos {
|
||||
http_addr,
|
||||
master_key: _,
|
||||
env,
|
||||
task_webhook_url: _,
|
||||
max_index_size: _,
|
||||
max_task_db_size: _,
|
||||
http_payload_size_limit,
|
||||
|
@ -114,10 +114,7 @@ pub fn create_app(
|
||||
.configure(routes::configure)
|
||||
.configure(|s| dashboard(s, enable_dashboard));
|
||||
|
||||
let app = app.wrap(actix_web::middleware::Condition::new(
|
||||
opt.experimental_enable_metrics,
|
||||
middleware::RouteMetrics,
|
||||
));
|
||||
let app = app.wrap(middleware::RouteMetrics);
|
||||
app.wrap(
|
||||
Cors::default()
|
||||
.send_wildcard()
|
||||
@ -231,6 +228,7 @@ fn open_or_create_database_unchecked(
|
||||
indexes_path: opt.db_path.join("indexes"),
|
||||
snapshots_path: opt.snapshot_dir.clone(),
|
||||
dumps_path: opt.dump_dir.clone(),
|
||||
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
|
||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
@ -365,7 +363,7 @@ fn import_dump(
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
},
|
||||
|indexing_step| log::debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| log::trace!("update: {:?}", indexing_step),
|
||||
|| false,
|
||||
)?;
|
||||
|
||||
@ -400,6 +398,7 @@ pub fn configure_data(
|
||||
.app_data(web::Data::from(analytics))
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
.limit(http_payload_size_limit)
|
||||
.content_type(|mime| mime == mime::APPLICATION_JSON)
|
||||
.error_handler(|err, req: &HttpRequest| match err {
|
||||
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {
|
||||
|
@ -30,10 +30,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
#[cfg(feature = "profile-with-puffin")]
|
||||
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
|
||||
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
|
||||
|
||||
anyhow::ensure!(
|
||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||
|
@ -3,8 +3,10 @@
|
||||
use std::future::{ready, Ready};
|
||||
|
||||
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
|
||||
use actix_web::web::Data;
|
||||
use actix_web::Error;
|
||||
use futures_util::future::LocalBoxFuture;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use prometheus::HistogramTimer;
|
||||
|
||||
pub struct RouteMetrics;
|
||||
@ -47,19 +49,27 @@ where
|
||||
|
||||
fn call(&self, req: ServiceRequest) -> Self::Future {
|
||||
let mut histogram_timer: Option<HistogramTimer> = None;
|
||||
let request_path = req.path();
|
||||
let is_registered_resource = req.resource_map().has_resource(request_path);
|
||||
if is_registered_resource {
|
||||
let request_method = req.method().to_string();
|
||||
histogram_timer = Some(
|
||||
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
|
||||
|
||||
// calling unwrap here is safe because index scheduler is added to app data while creating actix app.
|
||||
// also, the tests will fail if this is not present.
|
||||
let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
|
||||
let features = index_scheduler.features();
|
||||
|
||||
if features.check_metrics().is_ok() {
|
||||
let request_path = req.path();
|
||||
let is_registered_resource = req.resource_map().has_resource(request_path);
|
||||
if is_registered_resource {
|
||||
let request_method = req.method().to_string();
|
||||
histogram_timer = Some(
|
||||
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
|
||||
.with_label_values(&[&request_method, request_path])
|
||||
.start_timer(),
|
||||
);
|
||||
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
|
||||
.with_label_values(&[&request_method, request_path])
|
||||
.start_timer(),
|
||||
);
|
||||
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
|
||||
.with_label_values(&[&request_method, request_path])
|
||||
.inc();
|
||||
}
|
||||
.inc();
|
||||
}
|
||||
};
|
||||
|
||||
let fut = self.service.call(req);
|
||||
|
||||
|
@ -21,6 +21,7 @@ use rustls::RootCertStore;
|
||||
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sysinfo::{RefreshKind, System, SystemExt};
|
||||
use url::Url;
|
||||
|
||||
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
|
||||
|
||||
@ -28,6 +29,7 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
|
||||
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
|
||||
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
|
||||
const MEILI_ENV: &str = "MEILI_ENV";
|
||||
const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
|
||||
#[cfg(feature = "analytics")]
|
||||
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
|
||||
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
|
||||
@ -154,6 +156,10 @@ pub struct Opt {
|
||||
#[serde(default = "default_env")]
|
||||
pub env: String,
|
||||
|
||||
/// Called whenever a task finishes so a third party can be notified.
|
||||
#[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
|
||||
pub task_webhook_url: Option<Url>,
|
||||
|
||||
/// Deactivates Meilisearch's built-in telemetry when provided.
|
||||
///
|
||||
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
|
||||
@ -368,6 +374,7 @@ impl Opt {
|
||||
http_addr,
|
||||
master_key,
|
||||
env,
|
||||
task_webhook_url,
|
||||
max_index_size: _,
|
||||
max_task_db_size: _,
|
||||
http_payload_size_limit,
|
||||
@ -401,6 +408,10 @@ impl Opt {
|
||||
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
|
||||
}
|
||||
export_to_env_if_not_present(MEILI_ENV, env);
|
||||
if let Some(task_webhook_url) = task_webhook_url {
|
||||
export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
|
||||
}
|
||||
|
||||
#[cfg(feature = "analytics")]
|
||||
{
|
||||
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
|
||||
|
@ -29,12 +29,12 @@ async fn get_features(
|
||||
>,
|
||||
req: HttpRequest,
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let features = index_scheduler.features()?;
|
||||
) -> HttpResponse {
|
||||
let features = index_scheduler.features();
|
||||
|
||||
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
|
||||
debug!("returns: {:?}", features.runtime_features());
|
||||
Ok(HttpResponse::Ok().json(features.runtime_features()))
|
||||
HttpResponse::Ok().json(features.runtime_features())
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
@ -44,6 +44,10 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub score_details: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub vector_store: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub metrics: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub export_puffin_reports: Option<bool>,
|
||||
}
|
||||
|
||||
async fn patch_features(
|
||||
@ -55,26 +59,36 @@ async fn patch_features(
|
||||
req: HttpRequest,
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let features = index_scheduler.features()?;
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let old_features = features.runtime_features();
|
||||
|
||||
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
|
||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
||||
export_puffin_reports: new_features
|
||||
.0
|
||||
.export_puffin_reports
|
||||
.unwrap_or(old_features.export_puffin_reports),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
// the it renames to camelCase, which we don't want for analytics.
|
||||
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } =
|
||||
new_features;
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
score_details,
|
||||
vector_store,
|
||||
metrics,
|
||||
export_puffin_reports,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
"Experimental features Updated".to_string(),
|
||||
json!({
|
||||
"score_details": score_details,
|
||||
"vector_store": vector_store,
|
||||
"metrics": metrics,
|
||||
"export_puffin_reports": export_puffin_reports,
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
@ -612,8 +612,8 @@ fn retrieve_document<S: AsRef<str>>(
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
let internal_id = index
|
||||
.external_documents_ids(&txn)?
|
||||
.get(doc_id.as_bytes())
|
||||
.external_documents_ids()
|
||||
.get(&txn, doc_id)?
|
||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||
|
||||
let document = index
|
||||
|
@ -68,7 +68,7 @@ pub async fn search(
|
||||
}
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features()?;
|
||||
let features = index_scheduler.features();
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_facet_search(&index, search_query, facet_query, facet_name, features)
|
||||
})
|
||||
|
@ -157,7 +157,7 @@ pub async fn search_with_url_query(
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features()?;
|
||||
let features = index_scheduler.features();
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
@ -192,7 +192,7 @@ pub async fn search_with_post(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let features = index_scheduler.features()?;
|
||||
let features = index_scheduler.features();
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
|
@ -19,7 +19,7 @@ pub async fn get_metrics(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: Data<AuthController>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features()?.check_metrics()?;
|
||||
index_scheduler.features().check_metrics()?;
|
||||
let auth_filters = index_scheduler.filters();
|
||||
if !auth_filters.all_indexes_authorized() {
|
||||
let mut error = ResponseError::from(AuthenticationError::InvalidToken);
|
||||
|
@ -41,7 +41,7 @@ pub async fn multi_search_with_post(
|
||||
let queries = params.into_inner().queries;
|
||||
|
||||
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
|
||||
let features = index_scheduler.features()?;
|
||||
let features = index_scheduler.features();
|
||||
|
||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||
|
@ -8,11 +8,9 @@ use meilisearch_types::deserr::DeserrQueryParamError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
|
||||
use meilisearch_types::tasks::{
|
||||
serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
|
||||
};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
@ -37,140 +35,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
|
||||
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TaskView {
|
||||
pub uid: TaskId,
|
||||
#[serde(default)]
|
||||
pub index_uid: Option<String>,
|
||||
pub status: Status,
|
||||
#[serde(rename = "type")]
|
||||
pub kind: Kind,
|
||||
pub canceled_by: Option<TaskId>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub details: Option<DetailsView>,
|
||||
pub error: Option<ResponseError>,
|
||||
#[serde(serialize_with = "serialize_duration", default)]
|
||||
pub duration: Option<Duration>,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub started_at: Option<OffsetDateTime>,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl TaskView {
|
||||
pub fn from_task(task: &Task) -> TaskView {
|
||||
TaskView {
|
||||
uid: task.uid,
|
||||
index_uid: task.index_uid().map(ToOwned::to_owned),
|
||||
status: task.status,
|
||||
kind: task.kind.as_kind(),
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details.clone().map(DetailsView::from),
|
||||
error: task.error.clone(),
|
||||
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DetailsView {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub received_documents: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub indexed_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub primary_key: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub provided_ids: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub matched_tasks: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub canceled_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub original_filter: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub dump_uid: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(flatten)]
|
||||
pub settings: Option<Box<Settings<Unchecked>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub swaps: Option<Vec<IndexSwap>>,
|
||||
}
|
||||
|
||||
impl From<Details> for DetailsView {
|
||||
fn from(details: Details) -> Self {
|
||||
match details {
|
||||
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
|
||||
DetailsView {
|
||||
received_documents: Some(received_documents),
|
||||
indexed_documents: Some(indexed_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::SettingsUpdate { settings } => {
|
||||
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexInfo { primary_key } => {
|
||||
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
|
||||
}
|
||||
Details::DocumentDeletion {
|
||||
provided_ids: received_document_ids,
|
||||
deleted_documents,
|
||||
} => DetailsView {
|
||||
provided_ids: Some(received_document_ids),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
original_filter: Some(None),
|
||||
..DetailsView::default()
|
||||
},
|
||||
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
|
||||
DetailsView {
|
||||
provided_ids: Some(0),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::ClearAll { deleted_documents } => {
|
||||
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
|
||||
}
|
||||
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
canceled_tasks: Some(canceled_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
deleted_tasks: Some(deleted_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::Dump { dump_uid } => {
|
||||
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexSwap { swaps } => {
|
||||
DetailsView { swaps: Some(swaps), ..Default::default() }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct TasksFilterQuery {
|
||||
|
@ -2,10 +2,12 @@ use std::collections::{HashMap, HashSet};
|
||||
|
||||
use ::time::format_description::well_known::Rfc3339;
|
||||
use maplit::{hashmap, hashset};
|
||||
use meilisearch::Opt;
|
||||
use once_cell::sync::Lazy;
|
||||
use tempfile::TempDir;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::common::{Server, Value};
|
||||
use crate::common::{default_settings, Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
|
||||
@ -195,7 +197,9 @@ async fn access_authorized_master_key() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn access_authorized_restricted_index() {
|
||||
let mut server = Server::new_auth().await;
|
||||
let dir = TempDir::new().unwrap();
|
||||
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
|
||||
let mut server = Server::new_auth_with_options(enable_metrics, dir).await;
|
||||
for ((method, route), actions) in AUTHORIZATIONS.iter() {
|
||||
for action in actions {
|
||||
// create a new API key letting only the needed action.
|
||||
|
@ -5,9 +5,11 @@ pub mod service;
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
#[allow(unused)]
|
||||
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
|
||||
use meili_snap::json_string;
|
||||
use serde::{Deserialize, Serialize};
|
||||
#[allow(unused)]
|
||||
pub use server::{default_settings, Server};
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||
|
@ -202,6 +202,10 @@ impl Server {
|
||||
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
|
||||
self.service.patch("/experimental-features", value).await
|
||||
}
|
||||
|
||||
pub async fn get_metrics(&self) -> (Value, StatusCode) {
|
||||
self.service.get("/metrics").await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
@ -221,7 +225,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
skip_index_budget: true,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
},
|
||||
experimental_enable_metrics: true,
|
||||
experimental_enable_metrics: false,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
}
|
||||
}
|
||||
|
@ -397,7 +397,7 @@ async fn delete_document_by_complex_filter() {
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 4,
|
||||
"deletedDocuments": 2,
|
||||
"originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
|
||||
},
|
||||
"error": null,
|
||||
|
@ -1,4 +1,7 @@
|
||||
use crate::common::Server;
|
||||
use meilisearch::Opt;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::common::{default_settings, Server};
|
||||
use crate::json;
|
||||
|
||||
/// Feature name to test against.
|
||||
@ -16,7 +19,9 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -26,7 +31,9 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -36,7 +43,9 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -47,7 +56,9 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -58,11 +69,73 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn experimental_feature_metrics() {
|
||||
// instance flag for metrics enables metrics at startup
|
||||
let dir = TempDir::new().unwrap();
|
||||
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
|
||||
let server = Server::new_with_options(enable_metrics).await.unwrap();
|
||||
|
||||
let (response, code) = server.get_features().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": true,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = server.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
|
||||
// metrics are not returned in json format
|
||||
// so the test server will return null
|
||||
meili_snap::snapshot!(response, @"null");
|
||||
|
||||
// disabling metrics results in invalid request
|
||||
let (response, code) = server.set_features(json!({"metrics": false})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response["metrics"], @"false");
|
||||
|
||||
let (response, code) = server.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
// enabling metrics via HTTP results in valid request
|
||||
let (response, code) = server.set_features(json!({"metrics": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response["metrics"], @"true");
|
||||
|
||||
let (response, code) = server.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response, @"null");
|
||||
|
||||
// startup without flag respects persisted metrics value
|
||||
let disable_metrics =
|
||||
Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
|
||||
let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
|
||||
let (response, code) = server_no_flag.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response, @"null");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn errors() {
|
||||
let server = Server::new().await;
|
||||
@ -73,7 +146,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
241
meilisearch/tests/search/distinct.rs
Normal file
241
meilisearch/tests/search/distinct.rs
Normal file
@ -0,0 +1,241 @@
|
||||
use meili_snap::snapshot;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Brown"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"description": "T-Shirt",
|
||||
"brand": "Nike",
|
||||
"product_id": "789012",
|
||||
"color": "Red"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"description": "T-Shirt",
|
||||
"brand": "Nike",
|
||||
"product_id": "789012",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"description": "Running Shoes",
|
||||
"brand": "Adidas",
|
||||
"product_id": "456789",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"description": "Running Shoes",
|
||||
"brand": "Adidas",
|
||||
"product_id": "456789",
|
||||
"color": "White"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"description": "Hoodie",
|
||||
"brand": "Puma",
|
||||
"product_id": "987654",
|
||||
"color": "Gray"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Green"
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Red"
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Indigo"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Stone Wash"
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
|
||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
#[actix_rt::test]
|
||||
async fn distinct_search_with_offset_no_ranking() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(response: &Value) -> Vec<&str> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||
snapshot!(response["estimatedTotalHits"] , @"11");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"10");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"1");
|
||||
snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
}
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4130
|
||||
#[actix_rt::test]
|
||||
async fn distinct_search_with_pagination_no_ranking() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(response: &Value) -> Vec<&str> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["page"], @"0");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||
snapshot!(response["page"], @"1");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||
snapshot!(response["page"], @"2");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||
snapshot!(response["page"], @"3");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["page"], @"4");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"3");
|
||||
snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
|
||||
snapshot!(response["page"], @"2");
|
||||
snapshot!(response["totalPages"], @"2");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
// This modules contains all the test concerning search. Each particular feature of the search
|
||||
// should be tested in its own module to isolate tests and keep the tests readable.
|
||||
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod facet_search;
|
||||
mod formatted;
|
||||
@ -816,7 +817,7 @@ async fn experimental_feature_score_details() {
|
||||
},
|
||||
"proximity": {
|
||||
"order": 2,
|
||||
"score": 0.875
|
||||
"score": 0.75
|
||||
},
|
||||
"attribute": {
|
||||
"order": 3,
|
||||
|
@ -1,4 +1,5 @@
|
||||
mod errors;
|
||||
mod webhook;
|
||||
|
||||
use meili_snap::insta::assert_json_snapshot;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
|
123
meilisearch/tests/tasks/webhook.rs
Normal file
123
meilisearch/tests/tasks/webhook.rs
Normal file
@ -0,0 +1,123 @@
|
||||
//! To test the webhook, we need to spawn a new server with a URL listening for
|
||||
//! post requests. The webhook handle starts a server and forwards all the
|
||||
//! received requests into a channel for you to handle.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_http::body::MessageBody;
|
||||
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
||||
use actix_web::web::{Bytes, Data};
|
||||
use actix_web::{post, App, HttpResponse, HttpServer};
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
use tokio::sync::mpsc;
|
||||
use url::Url;
|
||||
|
||||
use crate::common::{default_settings, Server};
|
||||
use crate::json;
|
||||
|
||||
#[post("/")]
|
||||
async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
|
||||
let body = body.to_vec();
|
||||
sender.send(body).unwrap();
|
||||
HttpResponse::Ok().into()
|
||||
}
|
||||
|
||||
fn create_app(
|
||||
sender: Arc<mpsc::UnboundedSender<Vec<u8>>>,
|
||||
) -> actix_web::App<
|
||||
impl ServiceFactory<
|
||||
actix_web::dev::ServiceRequest,
|
||||
Config = (),
|
||||
Response = ServiceResponse<impl MessageBody>,
|
||||
Error = actix_web::Error,
|
||||
InitError = (),
|
||||
>,
|
||||
> {
|
||||
App::new().service(forward_body).app_data(Data::from(sender))
|
||||
}
|
||||
|
||||
struct WebhookHandle {
|
||||
pub server_handle: tokio::task::JoinHandle<Result<(), std::io::Error>>,
|
||||
pub url: String,
|
||||
pub receiver: mpsc::UnboundedReceiver<Vec<u8>>,
|
||||
}
|
||||
|
||||
async fn create_webhook_server() -> WebhookHandle {
|
||||
let mut log_builder = env_logger::Builder::new();
|
||||
log_builder.parse_filters("info");
|
||||
log_builder.init();
|
||||
|
||||
let (sender, receiver) = mpsc::unbounded_channel();
|
||||
let sender = Arc::new(sender);
|
||||
|
||||
// By listening on the port 0, the system will give us any available port.
|
||||
let server =
|
||||
HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
|
||||
let (ip, scheme) = server.addrs_with_scheme()[0];
|
||||
let url = format!("{scheme}://{ip}/");
|
||||
|
||||
let server_handle = tokio::spawn(server.run());
|
||||
WebhookHandle { server_handle, url, receiver }
|
||||
}
|
||||
|
||||
#[actix_web::test]
|
||||
async fn test_basic_webhook() {
|
||||
let WebhookHandle { server_handle, url, mut receiver } = create_webhook_server().await;
|
||||
|
||||
let db_path = tempfile::tempdir().unwrap();
|
||||
let server = Server::new_with_options(Opt {
|
||||
task_webhook_url: Some(Url::parse(&url).unwrap()),
|
||||
..default_settings(db_path.path())
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let index = server.index("tamo");
|
||||
// May be flaky: we're relying on the fact that while the first document addition is processed, the other
|
||||
// operations will be received and will be batched together. If it doesn't happen it's not a problem
|
||||
// the rest of the test won't assume anything about the number of tasks per batch.
|
||||
for i in 0..5 {
|
||||
let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
|
||||
}
|
||||
|
||||
let mut nb_tasks = 0;
|
||||
while let Some(payload) = receiver.recv().await {
|
||||
let payload = String::from_utf8(payload).unwrap();
|
||||
let jsonl = payload.split('\n');
|
||||
for json in jsonl {
|
||||
if json.is_empty() {
|
||||
break; // we reached EOF
|
||||
}
|
||||
nb_tasks += 1;
|
||||
let json: serde_json::Value = serde_json::from_str(json).unwrap();
|
||||
snapshot!(
|
||||
json_string!(json, { ".uid" => "[uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"indexUid": "tamo",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
if nb_tasks == 5 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert!(nb_tasks == 5, "We should have received the 5 tasks but only received {nb_tasks}");
|
||||
|
||||
server_handle.abort();
|
||||
}
|
19
meilitool/Cargo.toml
Normal file
19
meilitool/Cargo.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "meilitool"
|
||||
description = "A CLI to edit a Meilisearch database from the command line"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
homepage.workspace = true
|
||||
readme.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.75"
|
||||
clap = { version = "4.2.1", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
time = { version = "0.3.30", features = ["formatting"] }
|
||||
uuid = { version = "1.5.0", features = ["v4"], default-features = false }
|
312
meilitool/src/main.rs
Normal file
312
meilitool/src/main.rs
Normal file
@ -0,0 +1,312 @@
|
||||
use std::fs::{read_dir, read_to_string, remove_file, File};
|
||||
use std::io::BufWriter;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use clap::{Parser, Subcommand};
|
||||
use dump::{DumpWriter, IndexMetadata};
|
||||
use file_store::FileStore;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
use meilisearch_types::versioning::check_version_file;
|
||||
use meilisearch_types::Index;
|
||||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
use uuid_codec::UuidCodec;
|
||||
|
||||
mod uuid_codec;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Cli {
|
||||
/// The database path where the Meilisearch is running.
|
||||
#[arg(long, default_value = "data.ms/")]
|
||||
db_path: PathBuf,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Command {
|
||||
/// Clears the task queue and make it empty.
|
||||
///
|
||||
/// This command can be safely executed even if Meilisearch is running and processing tasks.
|
||||
/// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible,
|
||||
/// even the ones that were processing. However, it's highly possible that you see the processing
|
||||
/// tasks in the queue again with an associated internal error message.
|
||||
ClearTaskQueue,
|
||||
|
||||
/// Exports a dump from the Meilisearch database.
|
||||
///
|
||||
/// Make sure to run this command when Meilisearch is not running or running but not processing tasks.
|
||||
/// If tasks are being processed while a dump is being exported there are chances for the dump to be
|
||||
/// malformed with missing tasks.
|
||||
///
|
||||
/// TODO Verify this claim or make sure it cannot happen and we can export dumps
|
||||
/// without caring about killing Meilisearch first!
|
||||
ExportADump {
|
||||
/// The directory in which the dump will be created.
|
||||
#[arg(long, default_value = "dumps/")]
|
||||
dump_dir: PathBuf,
|
||||
|
||||
/// Skip dumping the enqueued or processing tasks.
|
||||
///
|
||||
/// Can be useful when there are a lot of them and it is not particularly useful
|
||||
/// to keep them. Note that only the enqueued tasks takes up space so skipping
|
||||
/// the processed ones is not particularly interesting.
|
||||
#[arg(long)]
|
||||
skip_enqueued_tasks: bool,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let Cli { db_path, command } = Cli::parse();
|
||||
|
||||
check_version_file(&db_path).context("While checking the version file")?;
|
||||
|
||||
match command {
|
||||
Command::ClearTaskQueue => clear_task_queue(db_path),
|
||||
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
|
||||
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Clears the task queue located at `db_path`.
|
||||
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
|
||||
let path = db_path.join("tasks");
|
||||
let env = EnvOpenOptions::new()
|
||||
.max_dbs(100)
|
||||
.open(&path)
|
||||
.with_context(|| format!("While trying to open {:?}", path.display()))?;
|
||||
|
||||
eprintln!("Deleting tasks from the database...");
|
||||
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?;
|
||||
let total = all_tasks.len(&wtxn)?;
|
||||
let status = try_opening_poly_database(&env, &wtxn, "status")?;
|
||||
let kind = try_opening_poly_database(&env, &wtxn, "kind")?;
|
||||
let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?;
|
||||
let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?;
|
||||
let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?;
|
||||
let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?;
|
||||
let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?;
|
||||
|
||||
try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?;
|
||||
try_clearing_poly_database(&mut wtxn, status, "status")?;
|
||||
try_clearing_poly_database(&mut wtxn, kind, "kind")?;
|
||||
try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?;
|
||||
try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?;
|
||||
try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?;
|
||||
try_clearing_poly_database(&mut wtxn, started_at, "started-at")?;
|
||||
try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?;
|
||||
|
||||
wtxn.commit().context("While committing the transaction")?;
|
||||
|
||||
eprintln!("Successfully deleted {total} tasks from the tasks database!");
|
||||
eprintln!("Deleting the content files from disk...");
|
||||
|
||||
let mut count = 0usize;
|
||||
let update_files = db_path.join("update_files");
|
||||
let entries = read_dir(&update_files).with_context(|| {
|
||||
format!("While trying to read the content of {:?}", update_files.display())
|
||||
})?;
|
||||
for result in entries {
|
||||
match result {
|
||||
Ok(ent) => match remove_file(ent.path()) {
|
||||
Ok(_) => count += 1,
|
||||
Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e),
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("Sucessfully deleted {count} content files from disk!");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn try_opening_database<KC: 'static, DC: 'static>(
|
||||
env: &Env,
|
||||
rtxn: &RoTxn,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<Database<KC, DC>> {
|
||||
env.open_database(rtxn, Some(db_name))
|
||||
.with_context(|| format!("While opening the {db_name:?} database"))?
|
||||
.with_context(|| format!("Missing the {db_name:?} database"))
|
||||
}
|
||||
|
||||
fn try_opening_poly_database(
|
||||
env: &Env,
|
||||
rtxn: &RoTxn,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<PolyDatabase> {
|
||||
env.open_poly_database(rtxn, Some(db_name))
|
||||
.with_context(|| format!("While opening the {db_name:?} poly database"))?
|
||||
.with_context(|| format!("Missing the {db_name:?} poly database"))
|
||||
}
|
||||
|
||||
fn try_clearing_poly_database(
|
||||
wtxn: &mut RwTxn,
|
||||
database: PolyDatabase,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
|
||||
}
|
||||
|
||||
/// Exports a dump into the dump directory.
|
||||
fn export_a_dump(
|
||||
db_path: PathBuf,
|
||||
dump_dir: PathBuf,
|
||||
skip_enqueued_tasks: bool,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let started_at = OffsetDateTime::now_utc();
|
||||
|
||||
// 1. Extracts the instance UID from disk
|
||||
let instance_uid_path = db_path.join("instance-uid");
|
||||
let instance_uid = match read_to_string(&instance_uid_path) {
|
||||
Ok(content) => match content.trim().parse() {
|
||||
Ok(uuid) => Some(uuid),
|
||||
Err(e) => {
|
||||
eprintln!("Impossible to parse instance-uid: {e}");
|
||||
None
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?;
|
||||
let file_store =
|
||||
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
|
||||
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = EnvOpenOptions::new()
|
||||
.max_dbs(100)
|
||||
.open(&index_scheduler_path)
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
eprintln!("Dumping the keys...");
|
||||
|
||||
// 2. dump the keys
|
||||
let auth_store = AuthController::new(&db_path, &None)
|
||||
.with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
|
||||
let mut dump_keys = dump.create_keys()?;
|
||||
let mut count = 0;
|
||||
for key in auth_store.list_keys()? {
|
||||
dump_keys.push_key(&key)?;
|
||||
count += 1;
|
||||
}
|
||||
dump_keys.flush()?;
|
||||
|
||||
eprintln!("Successfully dumped {count} keys!");
|
||||
|
||||
let rtxn = env.read_txn()?;
|
||||
let all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>> =
|
||||
try_opening_database(&env, &rtxn, "all-tasks")?;
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
try_opening_database(&env, &rtxn, "index-mapping")?;
|
||||
|
||||
if skip_enqueued_tasks {
|
||||
eprintln!("Skip dumping the enqueued tasks...");
|
||||
} else {
|
||||
eprintln!("Dumping the enqueued tasks...");
|
||||
|
||||
// 3. dump the tasks
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
let mut count = 0;
|
||||
for ret in all_tasks.iter(&rtxn)? {
|
||||
let (_, t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
|
||||
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file_uuid) = content_file {
|
||||
if status == Status::Enqueued {
|
||||
let content_file = file_store.get_update(content_file_uuid)?;
|
||||
|
||||
let reader =
|
||||
DocumentsBatchReader::from_reader(content_file).with_context(|| {
|
||||
format!("While reading content file {:?}", content_file_uuid)
|
||||
})?;
|
||||
|
||||
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
while let Some(doc) = cursor.next_document().with_context(|| {
|
||||
format!("While iterating on content file {:?}", content_file_uuid)
|
||||
})? {
|
||||
dump_content_file
|
||||
.push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
|
||||
}
|
||||
dump_content_file.flush()?;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
eprintln!("Successfully dumped {count} enqueued tasks!");
|
||||
}
|
||||
|
||||
eprintln!("Dumping the indexes...");
|
||||
|
||||
// 4. Dump the indexes
|
||||
let mut count = 0;
|
||||
for result in index_mapping.iter(&rtxn)? {
|
||||
let (uid, uuid) = result?;
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let metadata = IndexMetadata {
|
||||
uid: uid.to_owned(),
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
created_at: index.created_at(&rtxn)?,
|
||||
updated_at: index.updated_at(&rtxn)?,
|
||||
};
|
||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
// 4.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
let (_id, doc) = ret?;
|
||||
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
index_dumper.push_document(&document)?;
|
||||
}
|
||||
|
||||
// 4.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
|
||||
index_dumper.settings(&settings)?;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
eprintln!("Successfully dumped {count} indexes!");
|
||||
// We will not dump experimental feature settings
|
||||
eprintln!("The tool is not dumping experimental features, please set them by hand afterward");
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
let path = dump_dir.join(format!("{}.dump", dump_uid));
|
||||
let file = File::create(&path)?;
|
||||
dump.persist_to(BufWriter::new(file))?;
|
||||
|
||||
eprintln!("Dump exported at path {:?}", path.display());
|
||||
|
||||
Ok(())
|
||||
}
|
24
meilitool/src/uuid_codec.rs
Normal file
24
meilitool/src/uuid_codec.rs
Normal file
@ -0,0 +1,24 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use meilisearch_types::heed::{BytesDecode, BytesEncode};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// A heed codec for value of struct Uuid.
|
||||
pub struct UuidCodec;
|
||||
|
||||
impl<'a> BytesDecode<'a> for UuidCodec {
|
||||
type DItem = Uuid;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
bytes.try_into().ok().map(Uuid::from_bytes)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesEncode<'_> for UuidCodec {
|
||||
type EItem = Uuid;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
Some(Cow::Borrowed(item.as_bytes()))
|
||||
}
|
||||
}
|
@ -17,7 +17,7 @@ bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.8.3", default-features = false }
|
||||
charabia = { version = "0.8.5", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
@ -26,8 +26,8 @@ flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
geoutils = "0.5.1"
|
||||
grenad = { version = "0.4.4", default-features = false, features = [
|
||||
"tempfile",
|
||||
grenad = { version = "0.4.5", default-features = false, features = [
|
||||
"rayon", "tempfile"
|
||||
] }
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
|
||||
"lmdb", "read-txn-no-tls"
|
||||
@ -79,10 +79,11 @@ big_s = "1.0.2"
|
||||
insta = "1.29.0"
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.7.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
[features]
|
||||
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
|
||||
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
|
||||
|
||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||
# For more information on this feature, see heed's Cargo.toml
|
||||
@ -106,3 +107,6 @@ thai = ["charabia/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["charabia/greek"]
|
||||
|
||||
# allow khmer specialized tokenization
|
||||
khmer = ["charabia/khmer"]
|
||||
|
@ -1,4 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{io, str};
|
||||
|
||||
use obkv::KvReader;
|
||||
@ -19,14 +20,14 @@ use crate::FieldId;
|
||||
pub struct EnrichedDocumentsBatchReader<R> {
|
||||
documents: DocumentsBatchReader<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::ReaderCursor<File>,
|
||||
external_ids: grenad::ReaderCursor<BufReader<File>>,
|
||||
}
|
||||
|
||||
impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
|
||||
pub fn new(
|
||||
documents: DocumentsBatchReader<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::Reader<File>,
|
||||
external_ids: grenad::Reader<BufReader<File>>,
|
||||
) -> Result<Self, Error> {
|
||||
if documents.documents_count() as u64 == external_ids.len() {
|
||||
Ok(EnrichedDocumentsBatchReader {
|
||||
@ -75,7 +76,7 @@ pub struct EnrichedDocument<'a> {
|
||||
pub struct EnrichedDocumentsBatchCursor<R> {
|
||||
documents: DocumentsBatchCursor<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::ReaderCursor<File>,
|
||||
external_ids: grenad::ReaderCursor<BufReader<File>>,
|
||||
}
|
||||
|
||||
impl<R> EnrichedDocumentsBatchCursor<R> {
|
||||
|
@ -1,5 +1,6 @@
|
||||
mod builder;
|
||||
mod enriched;
|
||||
mod primary_key;
|
||||
mod reader;
|
||||
mod serde_impl;
|
||||
|
||||
@ -11,6 +12,7 @@ use bimap::BiHashMap;
|
||||
pub use builder::DocumentsBatchBuilder;
|
||||
pub use enriched::{EnrichedDocument, EnrichedDocumentsBatchCursor, EnrichedDocumentsBatchReader};
|
||||
use obkv::KvReader;
|
||||
pub use primary_key::{DocumentIdExtractionError, FieldIdMapper, PrimaryKey, DEFAULT_PRIMARY_KEY};
|
||||
pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@ -87,6 +89,12 @@ impl DocumentsBatchIndex {
|
||||
}
|
||||
}
|
||||
|
||||
impl FieldIdMapper for DocumentsBatchIndex {
|
||||
fn id(&self, name: &str) -> Option<FieldId> {
|
||||
self.id(name)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error("Error parsing number {value:?} at line {line}: {error}")]
|
||||
|
172
milli/src/documents/primary_key.rs
Normal file
172
milli/src/documents/primary_key.rs
Normal file
@ -0,0 +1,172 @@
|
||||
use std::iter;
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::{FieldId, InternalError, Object, Result, UserError};
|
||||
|
||||
/// The symbol used to define levels in a nested primary key.
|
||||
const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';
|
||||
|
||||
/// The default primary that is used when not specified.
|
||||
pub const DEFAULT_PRIMARY_KEY: &str = "id";
|
||||
|
||||
/// Trait for objects that can map the name of a field to its [`FieldId`].
|
||||
pub trait FieldIdMapper {
|
||||
/// Attempts to map the passed name to its [`FieldId`].
|
||||
///
|
||||
/// `None` if the field with this name was not found.
|
||||
fn id(&self, name: &str) -> Option<FieldId>;
|
||||
}
|
||||
|
||||
/// A type that represent the type of primary key that has been set
|
||||
/// for this index, a classic flat one or a nested one.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum PrimaryKey<'a> {
|
||||
Flat { name: &'a str, field_id: FieldId },
|
||||
Nested { name: &'a str },
|
||||
}
|
||||
|
||||
pub enum DocumentIdExtractionError {
|
||||
InvalidDocumentId(UserError),
|
||||
MissingDocumentId,
|
||||
TooManyDocumentIds(usize),
|
||||
}
|
||||
|
||||
impl<'a> PrimaryKey<'a> {
|
||||
pub fn new(path: &'a str, fields: &impl FieldIdMapper) -> Option<Self> {
|
||||
Some(if path.contains(PRIMARY_KEY_SPLIT_SYMBOL) {
|
||||
Self::Nested { name: path }
|
||||
} else {
|
||||
let field_id = fields.id(path)?;
|
||||
Self::Flat { name: path, field_id }
|
||||
})
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &str {
|
||||
match self {
|
||||
PrimaryKey::Flat { name, .. } => name,
|
||||
PrimaryKey::Nested { name } => name,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn document_id(
|
||||
&self,
|
||||
document: &obkv::KvReader<FieldId>,
|
||||
fields: &impl FieldIdMapper,
|
||||
) -> Result<StdResult<String, DocumentIdExtractionError>> {
|
||||
match self {
|
||||
PrimaryKey::Flat { name: _, field_id } => match document.get(*field_id) {
|
||||
Some(document_id_bytes) => {
|
||||
let document_id = serde_json::from_slice(document_id_bytes)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
match validate_document_id_value(document_id)? {
|
||||
Ok(document_id) => Ok(Ok(document_id)),
|
||||
Err(user_error) => {
|
||||
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
|
||||
}
|
||||
}
|
||||
}
|
||||
None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
|
||||
},
|
||||
nested @ PrimaryKey::Nested { .. } => {
|
||||
let mut matching_documents_ids = Vec::new();
|
||||
for (first_level_name, right) in nested.possible_level_names() {
|
||||
if let Some(field_id) = fields.id(first_level_name) {
|
||||
if let Some(value_bytes) = document.get(field_id) {
|
||||
let object = serde_json::from_slice(value_bytes)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
fetch_matching_values(object, right, &mut matching_documents_ids);
|
||||
|
||||
if matching_documents_ids.len() >= 2 {
|
||||
return Ok(Err(DocumentIdExtractionError::TooManyDocumentIds(
|
||||
matching_documents_ids.len(),
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match matching_documents_ids.pop() {
|
||||
Some(document_id) => match validate_document_id_value(document_id)? {
|
||||
Ok(document_id) => Ok(Ok(document_id)),
|
||||
Err(user_error) => {
|
||||
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
|
||||
}
|
||||
},
|
||||
None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an `Iterator` that gives all the possible fields names the primary key
|
||||
/// can have depending of the first level name and depth of the objects.
|
||||
pub fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
|
||||
let name = self.name();
|
||||
name.match_indices(PRIMARY_KEY_SPLIT_SYMBOL)
|
||||
.map(move |(i, _)| (&name[..i], &name[i + PRIMARY_KEY_SPLIT_SYMBOL.len_utf8()..]))
|
||||
.chain(iter::once((name, "")))
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {
|
||||
match value {
|
||||
Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),
|
||||
otherwise => output.push(otherwise),
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_matching_values_in_object(
|
||||
object: Object,
|
||||
selector: &str,
|
||||
base_key: &str,
|
||||
output: &mut Vec<Value>,
|
||||
) {
|
||||
for (key, value) in object {
|
||||
let base_key = if base_key.is_empty() {
|
||||
key.to_string()
|
||||
} else {
|
||||
format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)
|
||||
};
|
||||
|
||||
if starts_with(selector, &base_key) {
|
||||
match value {
|
||||
Value::Object(object) => {
|
||||
fetch_matching_values_in_object(object, selector, &base_key, output)
|
||||
}
|
||||
value => output.push(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn starts_with(selector: &str, key: &str) -> bool {
|
||||
selector.strip_prefix(key).map_or(false, |tail| {
|
||||
tail.chars().next().map(|c| c == PRIMARY_KEY_SPLIT_SYMBOL).unwrap_or(true)
|
||||
})
|
||||
}
|
||||
|
||||
// FIXME: move to a DocumentId struct
|
||||
|
||||
fn validate_document_id(document_id: &str) -> Option<&str> {
|
||||
if !document_id.is_empty()
|
||||
&& document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'))
|
||||
{
|
||||
Some(document_id)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
|
||||
match document_id {
|
||||
Value::String(string) => match validate_document_id(&string) {
|
||||
Some(s) if s.len() == string.len() => Ok(Ok(string)),
|
||||
Some(s) => Ok(Ok(s.to_string())),
|
||||
None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
|
||||
},
|
||||
Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
|
||||
content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
|
||||
}
|
||||
}
|
@ -89,8 +89,6 @@ pub enum FieldIdMapMissingEntry {
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum UserError {
|
||||
#[error("A soft deleted internal document id have been used: `{document_id}`.")]
|
||||
AccessingSoftDeletedDocument { document_id: DocumentId },
|
||||
#[error("A document cannot contain more than 65,535 fields.")]
|
||||
AttributeLimitReached,
|
||||
#[error(transparent)]
|
||||
|
@ -1,159 +1,75 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::{fmt, str};
|
||||
|
||||
use fst::map::IndexedValue;
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use roaring::RoaringBitmap;
|
||||
use heed::types::{OwnedType, Str};
|
||||
use heed::{Database, RoIter, RoTxn, RwTxn};
|
||||
|
||||
const DELETED_ID: u64 = u64::MAX;
|
||||
use crate::{DocumentId, BEU32};
|
||||
|
||||
pub struct ExternalDocumentsIds<'a> {
|
||||
pub(crate) hard: fst::Map<Cow<'a, [u8]>>,
|
||||
pub(crate) soft: fst::Map<Cow<'a, [u8]>>,
|
||||
soft_deleted_docids: RoaringBitmap,
|
||||
pub enum DocumentOperationKind {
|
||||
Create,
|
||||
Delete,
|
||||
}
|
||||
|
||||
impl<'a> ExternalDocumentsIds<'a> {
|
||||
pub fn new(
|
||||
hard: fst::Map<Cow<'a, [u8]>>,
|
||||
soft: fst::Map<Cow<'a, [u8]>>,
|
||||
soft_deleted_docids: RoaringBitmap,
|
||||
) -> ExternalDocumentsIds<'a> {
|
||||
ExternalDocumentsIds { hard, soft, soft_deleted_docids }
|
||||
}
|
||||
pub struct DocumentOperation {
|
||||
pub external_id: String,
|
||||
pub internal_id: DocumentId,
|
||||
pub kind: DocumentOperationKind,
|
||||
}
|
||||
|
||||
pub fn into_static(self) -> ExternalDocumentsIds<'static> {
|
||||
ExternalDocumentsIds {
|
||||
hard: self.hard.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
|
||||
soft: self.soft.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
|
||||
soft_deleted_docids: self.soft_deleted_docids,
|
||||
}
|
||||
pub struct ExternalDocumentsIds(Database<Str, OwnedType<BEU32>>);
|
||||
|
||||
impl ExternalDocumentsIds {
|
||||
pub fn new(db: Database<Str, OwnedType<BEU32>>) -> ExternalDocumentsIds {
|
||||
ExternalDocumentsIds(db)
|
||||
}
|
||||
|
||||
/// Returns `true` if hard and soft external documents lists are empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.hard.is_empty() && self.soft.is_empty()
|
||||
pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
|
||||
self.0.is_empty(rtxn).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
|
||||
let external_id = external_id.as_ref();
|
||||
match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
|
||||
Some(id) if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) => {
|
||||
Some(id.try_into().unwrap())
|
||||
}
|
||||
_otherwise => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Rebuild the internal FSTs in the ExternalDocumentsIds structure such that they
|
||||
/// don't contain any soft deleted document id.
|
||||
pub fn delete_soft_deleted_documents_ids_from_fsts(&mut self) -> fst::Result<()> {
|
||||
let mut new_hard_builder = fst::MapBuilder::memory();
|
||||
|
||||
let union_op = self.hard.op().add(&self.soft).r#union();
|
||||
let mut iter = union_op.into_stream();
|
||||
while let Some((external_id, docids)) = iter.next() {
|
||||
// prefer selecting the ids from soft, always
|
||||
let id = indexed_last_value(docids).unwrap();
|
||||
if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) {
|
||||
new_hard_builder.insert(external_id, id)?;
|
||||
}
|
||||
}
|
||||
drop(iter);
|
||||
|
||||
// Delete soft map completely
|
||||
self.soft = fst::Map::default().map_data(Cow::Owned)?;
|
||||
// We save the new map as the new hard map.
|
||||
self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn insert_ids<A: AsRef<[u8]>>(&mut self, other: &fst::Map<A>) -> fst::Result<()> {
|
||||
let union_op = self.soft.op().add(other).r#union();
|
||||
|
||||
let mut new_soft_builder = fst::MapBuilder::memory();
|
||||
let mut iter = union_op.into_stream();
|
||||
while let Some((external_id, marked_docids)) = iter.next() {
|
||||
let id = indexed_last_value(marked_docids).unwrap();
|
||||
new_soft_builder.insert(external_id, id)?;
|
||||
}
|
||||
|
||||
drop(iter);
|
||||
|
||||
// We save the new map as the new soft map.
|
||||
self.soft = new_soft_builder.into_map().map_data(Cow::Owned)?;
|
||||
self.merge_soft_into_hard()
|
||||
pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
|
||||
Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get()))
|
||||
}
|
||||
|
||||
/// An helper function to debug this type, returns an `HashMap` of both,
|
||||
/// soft and hard fst maps, combined.
|
||||
pub fn to_hash_map(&self) -> HashMap<String, u32> {
|
||||
let mut map = HashMap::new();
|
||||
|
||||
let union_op = self.hard.op().add(&self.soft).r#union();
|
||||
let mut iter = union_op.into_stream();
|
||||
while let Some((external_id, marked_docids)) = iter.next() {
|
||||
let id = indexed_last_value(marked_docids).unwrap();
|
||||
if id != DELETED_ID {
|
||||
let external_id = str::from_utf8(external_id).unwrap();
|
||||
map.insert(external_id.to_owned(), id.try_into().unwrap());
|
||||
}
|
||||
pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
|
||||
let mut map = HashMap::default();
|
||||
for result in self.0.iter(rtxn)? {
|
||||
let (external, internal) = result?;
|
||||
map.insert(external.to_owned(), internal.get());
|
||||
}
|
||||
|
||||
map
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
/// Return an fst of the combined hard and soft deleted ID.
|
||||
pub fn to_fst<'b>(&'b self) -> fst::Result<Cow<'b, fst::Map<Cow<'a, [u8]>>>> {
|
||||
if self.soft.is_empty() {
|
||||
return Ok(Cow::Borrowed(&self.hard));
|
||||
}
|
||||
let union_op = self.hard.op().add(&self.soft).r#union();
|
||||
|
||||
let mut iter = union_op.into_stream();
|
||||
let mut new_hard_builder = fst::MapBuilder::memory();
|
||||
while let Some((external_id, marked_docids)) = iter.next() {
|
||||
let value = indexed_last_value(marked_docids).unwrap();
|
||||
if value != DELETED_ID {
|
||||
new_hard_builder.insert(external_id, value)?;
|
||||
/// Applies the list of operations passed as argument, modifying the current external to internal id mapping.
|
||||
///
|
||||
/// If the list contains multiple operations on the same external id, then the result is unspecified.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - If attempting to delete a document that doesn't exist
|
||||
/// - If attempting to create a document that already exists
|
||||
pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
|
||||
for DocumentOperation { external_id, internal_id, kind } in operations {
|
||||
match kind {
|
||||
DocumentOperationKind::Create => {
|
||||
self.0.put(wtxn, &external_id, &BEU32::new(internal_id))?;
|
||||
}
|
||||
DocumentOperationKind::Delete => {
|
||||
if !self.0.delete(wtxn, &external_id)? {
|
||||
panic!("Attempting to delete a non-existing document")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
drop(iter);
|
||||
|
||||
Ok(Cow::Owned(new_hard_builder.into_map().map_data(Cow::Owned)?))
|
||||
}
|
||||
|
||||
fn merge_soft_into_hard(&mut self) -> fst::Result<()> {
|
||||
if self.soft.len() >= self.hard.len() / 2 {
|
||||
self.hard = self.to_fst()?.into_owned();
|
||||
self.soft = fst::Map::default().map_data(Cow::Owned)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ExternalDocumentsIds<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_tuple("ExternalDocumentsIds").field(&self.to_hash_map()).finish()
|
||||
/// Returns an iterator over all the external ids.
|
||||
pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, OwnedType<BEU32>>> {
|
||||
self.0.iter(rtxn)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ExternalDocumentsIds<'static> {
|
||||
fn default() -> Self {
|
||||
ExternalDocumentsIds {
|
||||
hard: fst::Map::default().map_data(Cow::Owned).unwrap(),
|
||||
soft: fst::Map::default().map_data(Cow::Owned).unwrap(),
|
||||
soft_deleted_docids: RoaringBitmap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the value of the `IndexedValue` with the highest _index_.
|
||||
fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option<u64> {
|
||||
indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value)
|
||||
}
|
||||
|
@ -81,6 +81,12 @@ impl Default for FieldsIdsMap {
|
||||
}
|
||||
}
|
||||
|
||||
impl crate::documents::FieldIdMapper for FieldsIdsMap {
|
||||
fn id(&self, name: &str) -> Option<FieldId> {
|
||||
self.id(name)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -6,6 +6,7 @@ use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
|
||||
/// This is the limit where using a byteorder became less size efficient
|
||||
/// than using a direct roaring encoding, it is also the point where we are able
|
||||
@ -60,12 +61,16 @@ impl CboRoaringBitmapCodec {
|
||||
/// if the merged values length is under the threshold, values are directly
|
||||
/// serialized in the buffer else a RoaringBitmap is created from the
|
||||
/// values and is serialized in the buffer.
|
||||
pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
|
||||
pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = A>,
|
||||
A: AsRef<[u8]>,
|
||||
{
|
||||
let mut roaring = RoaringBitmap::new();
|
||||
let mut vec = Vec::new();
|
||||
|
||||
for bytes in slices {
|
||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||
if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
|
||||
let mut reader = bytes.as_ref();
|
||||
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
||||
vec.push(integer);
|
||||
@ -85,7 +90,7 @@ impl CboRoaringBitmapCodec {
|
||||
}
|
||||
} else {
|
||||
// We can unwrap safely because the vector is sorted upper.
|
||||
let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter()).unwrap();
|
||||
let roaring = RoaringBitmap::from_sorted_iter(vec).unwrap();
|
||||
roaring.serialize_into(buffer)?;
|
||||
}
|
||||
} else {
|
||||
@ -95,6 +100,33 @@ impl CboRoaringBitmapCodec {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Merges a DelAdd delta into a CboRoaringBitmap.
|
||||
pub fn merge_deladd_into<'a>(
|
||||
deladd: KvReaderDelAdd<'_>,
|
||||
previous: &[u8],
|
||||
buffer: &'a mut Vec<u8>,
|
||||
) -> io::Result<Option<&'a [u8]>> {
|
||||
// Deserialize the bitmap that is already there
|
||||
let mut previous = Self::deserialize_from(previous)?;
|
||||
|
||||
// Remove integers we no more want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Deletion) {
|
||||
previous -= Self::deserialize_from(value)?;
|
||||
}
|
||||
|
||||
// Insert the new integers we want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Addition) {
|
||||
previous |= Self::deserialize_from(value)?;
|
||||
}
|
||||
|
||||
if previous.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Self::serialize_into(&previous, buffer);
|
||||
Ok(Some(&buffer[..]))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,7 +2,7 @@ use std::cmp;
|
||||
|
||||
use crate::{relative_from_absolute_position, Position};
|
||||
|
||||
pub const MAX_DISTANCE: u32 = 8;
|
||||
pub const MAX_DISTANCE: u32 = 4;
|
||||
|
||||
pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
|
||||
if lhs <= rhs {
|
||||
|
@ -13,7 +13,7 @@ use crate::heed_codec::ByteSliceRefCodec;
|
||||
/// The documents returned by the iterator are grouped by the facet values that
|
||||
/// determined their rank. For example, given the documents:
|
||||
///
|
||||
/// ```ignore
|
||||
/// ```text
|
||||
/// 0: { "colour": ["blue", "green"] }
|
||||
/// 1: { "colour": ["blue", "red"] }
|
||||
/// 2: { "colour": ["orange", "red"] }
|
||||
@ -22,7 +22,7 @@ use crate::heed_codec::ByteSliceRefCodec;
|
||||
/// ```
|
||||
/// Then calling the function on the candidates `[0, 2, 3, 4]` will return an iterator
|
||||
/// over the following elements:
|
||||
/// ```ignore
|
||||
/// ```text
|
||||
/// [0, 4] // corresponds to all the documents within the candidates that have the facet value "blue"
|
||||
/// [3] // same for "green"
|
||||
/// [2] // same for "orange"
|
||||
|
@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
|
||||
use std::ops::Bound::{self, Excluded, Included};
|
||||
|
||||
use either::Either;
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
|
||||
@ -223,12 +223,9 @@ impl<'a> Filter<'a> {
|
||||
impl<'a> Filter<'a> {
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
|
||||
// and finally we delete all the soft_deleted_documents, again, only once at the very end
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields)
|
||||
.map(|result| result - soft_deleted_documents)
|
||||
}
|
||||
|
||||
fn evaluate_operator(
|
||||
|
@ -11,7 +11,7 @@ use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
|
||||
use self::new::PartialSearchResult;
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
|
@ -53,11 +53,22 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
if excluded.contains(docid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
||||
results.push(docid);
|
||||
}
|
||||
|
||||
let mut all_candidates = universe - excluded;
|
||||
all_candidates.extend(results.iter().copied());
|
||||
// drain the results of the skipped elements
|
||||
// this **must** be done **after** writing the entire results in `all_candidates` to ensure
|
||||
// e.g. estimatedTotalHits is correct.
|
||||
if results.len() >= from {
|
||||
results.drain(..from);
|
||||
} else {
|
||||
results.clear();
|
||||
}
|
||||
|
||||
return Ok(BucketSortOutput {
|
||||
scores: vec![Default::default(); results.len()],
|
||||
docids: results,
|
||||
|
@ -12,7 +12,7 @@ use super::Word;
|
||||
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
||||
use crate::{
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
|
||||
};
|
||||
|
||||
/// A cache storing pointers to values in the LMDB databases.
|
||||
@ -25,7 +25,7 @@ pub struct DatabaseCache<'ctx> {
|
||||
pub word_pair_proximity_docids:
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||
pub word_prefix_pair_proximity_docids:
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<RoaringBitmap>>,
|
||||
pub prefix_word_pair_proximity_docids:
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||
pub word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
|
||||
@ -168,7 +168,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
@ -182,7 +182,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self,
|
||||
word: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
@ -230,7 +230,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
self.word_interner.get(prefix).as_str(),
|
||||
@ -244,7 +244,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self,
|
||||
prefix: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
self.word_interner.get(prefix).as_str(),
|
||||
@ -297,35 +297,47 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
prefix2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, prefix2),
|
||||
&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(prefix2).as_str(),
|
||||
),
|
||||
&mut self.db_cache.word_prefix_pair_proximity_docids,
|
||||
self.index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||
)
|
||||
let docids = match self
|
||||
.db_cache
|
||||
.word_prefix_pair_proximity_docids
|
||||
.entry((proximity, word1, prefix2))
|
||||
{
|
||||
Entry::Occupied(docids) => docids.get().clone(),
|
||||
Entry::Vacant(entry) => {
|
||||
// compute docids using prefix iter and store the result in the cache.
|
||||
let key = U8StrStrCodec::bytes_encode(&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(prefix2).as_str(),
|
||||
))
|
||||
.unwrap()
|
||||
.into_owned();
|
||||
let mut prefix_docids = RoaringBitmap::new();
|
||||
let remap_key_type = self
|
||||
.index
|
||||
.word_pair_proximity_docids
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter(self.txn, &key)?;
|
||||
for result in remap_key_type {
|
||||
let (_, docids) = result?;
|
||||
|
||||
prefix_docids |= docids;
|
||||
}
|
||||
entry.insert(Some(prefix_docids.clone()));
|
||||
Some(prefix_docids)
|
||||
}
|
||||
};
|
||||
Ok(docids)
|
||||
}
|
||||
|
||||
pub fn get_db_prefix_word_pair_proximity_docids(
|
||||
&mut self,
|
||||
left_prefix: Interned<String>,
|
||||
right: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, left_prefix, right),
|
||||
&(
|
||||
proximity,
|
||||
self.word_interner.get(left_prefix).as_str(),
|
||||
self.word_interner.get(right).as_str(),
|
||||
),
|
||||
&mut self.db_cache.prefix_word_pair_proximity_docids,
|
||||
self.index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||
)
|
||||
// only accept exact matches on reverted positions
|
||||
self.get_db_word_pair_proximity_docids(left_prefix, right, proximity)
|
||||
}
|
||||
|
||||
pub fn get_db_word_fid_docids(
|
||||
|
@ -434,7 +434,18 @@ pub fn execute_search(
|
||||
let mut search = Search::default();
|
||||
let docids = match ctx.index.vector_hnsw(ctx.txn)? {
|
||||
Some(hnsw) => {
|
||||
if let Some(expected_size) = hnsw.iter().map(|(_, point)| point.len()).next() {
|
||||
if vector.len() != expected_size {
|
||||
return Err(UserError::InvalidVectorDimensions {
|
||||
expected: expected_size,
|
||||
found: vector.len(),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
|
||||
let vector = NDotProductPoint::new(vector.clone());
|
||||
|
||||
let neighbors = hnsw.search(&vector, &mut search);
|
||||
|
||||
let mut docids = Vec::new();
|
||||
|
@ -29,7 +29,7 @@ use std::hash::Hash;
|
||||
pub use cheapest_paths::PathVisitor;
|
||||
pub use condition_docids_cache::ConditionDocIdsCache;
|
||||
pub use dead_ends_cache::DeadEndsCache;
|
||||
pub use exactness::{ExactnessCondition, ExactnessGraph};
|
||||
pub use exactness::ExactnessGraph;
|
||||
pub use fid::{FidCondition, FidGraph};
|
||||
pub use position::{PositionCondition, PositionGraph};
|
||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||
|
@ -1,6 +1,7 @@
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use super::ProximityCondition;
|
||||
use crate::proximity::MAX_DISTANCE;
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::SearchContext;
|
||||
@ -35,7 +36,7 @@ pub fn build_edges(
|
||||
}
|
||||
|
||||
let mut conditions = vec![];
|
||||
for cost in right_ngram_max..(7 + right_ngram_max) {
|
||||
for cost in right_ngram_max..(((MAX_DISTANCE as usize) - 1) + right_ngram_max) {
|
||||
conditions.push((
|
||||
cost as u32,
|
||||
conditions_interner.insert(ProximityCondition::Uninit {
|
||||
@ -47,7 +48,7 @@ pub fn build_edges(
|
||||
}
|
||||
|
||||
conditions.push((
|
||||
(7 + right_ngram_max) as u32,
|
||||
((MAX_DISTANCE - 1) + (right_ngram_max as u32)),
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
));
|
||||
|
||||
|
@ -273,7 +273,7 @@ fn test_proximity_simple() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 5, 2, 3, 0, 1]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 2, 3, 5, 1, 0]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
@ -282,11 +282,11 @@ fn test_proximity_simple() {
|
||||
"\"the quickbrown fox jumps over the lazy dog\"",
|
||||
"\"the really quick brown fox jumps over the lazy dog\"",
|
||||
"\"the really quick brown fox jumps over the very lazy dog\"",
|
||||
"\"brown quick fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy. dog\"",
|
||||
"\"dog the quick brown fox jumps over the lazy\"",
|
||||
"\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
|
||||
"\"brown quick fox jumps over the lazy dog\"",
|
||||
"\"the. quick brown fox jumps over the lazy. dog\"",
|
||||
"\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best s");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11, 15]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 9, 6, 7, 8, 11, 12, 13, 15]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@ -379,13 +379,13 @@ fn test_proximity_prefix_db() {
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"this is the best summer meal\"",
|
||||
"\"summer best\"",
|
||||
"\"this is the best meal of summer\"",
|
||||
"\"summer x best\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"summer x y best\"",
|
||||
"\"summer x best\"",
|
||||
"\"summer best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
"###);
|
||||
@ -396,7 +396,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best su");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 11, 7, 6, 15]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@ -406,10 +406,10 @@ fn test_proximity_prefix_db() {
|
||||
"\"summer best\"",
|
||||
"\"this is the best meal of summer\"",
|
||||
"\"summer x best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"summer x y best\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
"###);
|
||||
@ -423,17 +423,17 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best win");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[15, 16, 17, 18, 19, 20, 21, 22]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"this is the best winter meal\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"this is the best winter meal\"",
|
||||
"\"winter x y best\"",
|
||||
"\"winter x best\"",
|
||||
"\"winter best\"",
|
||||
@ -447,7 +447,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best wint");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 20, 16, 15]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@ -457,10 +457,10 @@ fn test_proximity_prefix_db() {
|
||||
"\"winter best\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"winter x best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"winter x y best\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
@ -471,20 +471,20 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best wi");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 15, 16, 20]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"this is the best winter meal\"",
|
||||
"\"winter best\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"winter x best\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"winter x y best\"",
|
||||
"\"winter x best\"",
|
||||
"\"winter best\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
@ -68,8 +68,8 @@ fn test_trap_basic() {
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
@ -82,8 +82,8 @@ fn test_trap_basic() {
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
|
@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -101,8 +101,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -153,8 +153,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -179,8 +179,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 21,
|
||||
max_rank: 22,
|
||||
rank: 9,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -205,8 +205,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 17,
|
||||
max_rank: 22,
|
||||
rank: 5,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -231,8 +231,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 17,
|
||||
max_rank: 22,
|
||||
rank: 5,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -3,59 +3,35 @@ source: milli/src/search/new/tests/proximity.rs
|
||||
expression: "format!(\"{document_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -63,7 +39,31 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,40 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -47,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -55,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -63,7 +39,31 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,40 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -47,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -55,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -63,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -71,7 +47,31 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -3,59 +3,35 @@ source: milli/src/search/new/tests/proximity.rs
|
||||
expression: "format!(\"{document_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -63,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -71,7 +47,31 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,8 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -15,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -23,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -31,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -39,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -47,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -55,15 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,24 +6,24 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 15,
|
||||
rank: 4,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 50,
|
||||
max_rank: 50,
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -24,132 +24,6 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 50,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 49,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 49,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 48,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 41,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 40,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 8,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 36,
|
||||
max_rank: 36,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 31,
|
||||
max_rank: 36,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 5,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
@ -160,14 +34,126 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 4,
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 21,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 21,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 20,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 17,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 16,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 8,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 16,
|
||||
max_rank: 16,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 16,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 5,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -180,8 +166,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -194,8 +180,22 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 4,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 42,
|
||||
max_rank: 43,
|
||||
rank: 18,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 42,
|
||||
max_rank: 43,
|
||||
rank: 18,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 41,
|
||||
max_rank: 43,
|
||||
rank: 17,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 34,
|
||||
max_rank: 43,
|
||||
rank: 14,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 33,
|
||||
max_rank: 43,
|
||||
rank: 13,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 36,
|
||||
max_rank: 36,
|
||||
rank: 16,
|
||||
max_rank: 16,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 29,
|
||||
max_rank: 29,
|
||||
rank: 13,
|
||||
max_rank: 13,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 24,
|
||||
max_rank: 29,
|
||||
rank: 10,
|
||||
max_rank: 13,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 55,
|
||||
max_rank: 57,
|
||||
rank: 23,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 52,
|
||||
max_rank: 57,
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 51,
|
||||
max_rank: 57,
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 48,
|
||||
max_rank: 57,
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 47,
|
||||
max_rank: 57,
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 57,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 50,
|
||||
max_rank: 50,
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 38,
|
||||
max_rank: 43,
|
||||
rank: 16,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -222,8 +222,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 29,
|
||||
max_rank: 29,
|
||||
rank: 13,
|
||||
max_rank: 13,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -236,8 +236,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -250,8 +250,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -264,8 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 55,
|
||||
max_rank: 57,
|
||||
rank: 23,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 52,
|
||||
max_rank: 57,
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 47,
|
||||
max_rank: 57,
|
||||
rank: 18,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 45,
|
||||
max_rank: 57,
|
||||
rank: 18,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 57,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 47,
|
||||
max_rank: 50,
|
||||
rank: 19,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 40,
|
||||
max_rank: 43,
|
||||
rank: 16,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 35,
|
||||
max_rank: 43,
|
||||
rank: 13,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -222,8 +222,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 26,
|
||||
max_rank: 29,
|
||||
rank: 10,
|
||||
max_rank: 13,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -236,8 +236,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 22,
|
||||
rank: 7,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -250,8 +250,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 22,
|
||||
rank: 7,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -264,8 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 22,
|
||||
rank: 7,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 15,
|
||||
rank: 5,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,88 +6,88 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 55,
|
||||
max_rank: 57,
|
||||
rank: 23,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 52,
|
||||
max_rank: 57,
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 51,
|
||||
max_rank: 57,
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 48,
|
||||
max_rank: 57,
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 47,
|
||||
max_rank: 57,
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -95,7 +95,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 57,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -13,6 +13,7 @@ This module tests the `sort` ranking rule:
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
use meili_snap::insta;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
|
@ -259,8 +259,8 @@ fn test_ignore_stop_words() {
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
Fid(
|
||||
@ -411,8 +411,8 @@ fn test_stop_words_in_phrase() {
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
Fid(
|
||||
|
@ -277,7 +277,7 @@ fn test_words_proximity_tms_last_simple() {
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
|
||||
// 7 is better than 6 because of the proximity between "the" and its surrounding terms
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
@ -289,10 +289,10 @@ fn test_words_proximity_tms_last_simple() {
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
@ -312,7 +312,7 @@ fn test_words_proximity_tms_last_simple() {
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
|
||||
// 10 is better than 9 because of the proximity between "quick" and "brown"
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 15, 16, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
@ -326,8 +326,8 @@ fn test_words_proximity_tms_last_simple() {
|
||||
"\"the great quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
@ -427,7 +427,7 @@ fn test_words_tms_all() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
@ -439,10 +439,10 @@ fn test_words_tms_all() {
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
]
|
||||
"###);
|
||||
|
@ -4,9 +4,8 @@ use std::path::Path;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
use crate::{make_db_snap_from_iter, obkv_to_json, ExternalDocumentsIds, Index};
|
||||
use crate::{make_db_snap_from_iter, obkv_to_json, Index};
|
||||
|
||||
#[track_caller]
|
||||
pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> (insta::Settings, String) {
|
||||
@ -98,7 +97,6 @@ Create a snapshot test of the given database.
|
||||
- `facet_id_string_docids`
|
||||
- `documents_ids`
|
||||
- `stop_words`
|
||||
- `soft_deleted_documents_ids`
|
||||
- `field_distribution`
|
||||
- `fields_ids_map`
|
||||
- `geo_faceted_documents_ids`
|
||||
@ -221,22 +219,6 @@ pub fn snap_word_pair_proximity_docids(index: &Index) -> String {
|
||||
&format!("{proximity:<2} {word1:<16} {word2:<16} {}", display_bitmap(&b))
|
||||
})
|
||||
}
|
||||
pub fn snap_word_prefix_pair_proximity_docids(index: &Index) -> String {
|
||||
make_db_snap_from_iter!(index, word_prefix_pair_proximity_docids, |(
|
||||
(proximity, word1, prefix),
|
||||
b,
|
||||
)| {
|
||||
&format!("{proximity:<2} {word1:<16} {prefix:<4} {}", display_bitmap(&b))
|
||||
})
|
||||
}
|
||||
pub fn snap_prefix_word_pair_proximity_docids(index: &Index) -> String {
|
||||
make_db_snap_from_iter!(index, prefix_word_pair_proximity_docids, |(
|
||||
(proximity, prefix, word2),
|
||||
b,
|
||||
)| {
|
||||
&format!("{proximity:<2} {prefix:<4} {word2:<16} {}", display_bitmap(&b))
|
||||
})
|
||||
}
|
||||
pub fn snap_word_position_docids(index: &Index) -> String {
|
||||
make_db_snap_from_iter!(index, word_position_docids, |((word, position), b)| {
|
||||
&format!("{word:<16} {position:<6} {}", display_bitmap(&b))
|
||||
@ -308,12 +290,6 @@ pub fn snap_stop_words(index: &Index) -> String {
|
||||
let snap = format!("{stop_words:?}");
|
||||
snap
|
||||
}
|
||||
pub fn snap_soft_deleted_documents_ids(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap();
|
||||
|
||||
display_bitmap(&soft_deleted_documents_ids)
|
||||
}
|
||||
pub fn snap_field_distributions(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut snap = String::new();
|
||||
@ -340,50 +316,21 @@ pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
|
||||
}
|
||||
pub fn snap_external_documents_ids(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let ExternalDocumentsIds { soft, hard, .. } = index.external_documents_ids(&rtxn).unwrap();
|
||||
let external_ids = index.external_documents_ids().to_hash_map(&rtxn).unwrap();
|
||||
// ensure fixed order (not guaranteed by hashmap)
|
||||
let mut external_ids: Vec<(String, u32)> = external_ids.into_iter().collect();
|
||||
external_ids.sort_by(|(l, _), (r, _)| l.cmp(r));
|
||||
|
||||
let mut snap = String::new();
|
||||
|
||||
writeln!(&mut snap, "soft:").unwrap();
|
||||
let stream_soft = soft.stream();
|
||||
let soft_external_ids = stream_soft.into_str_vec().unwrap();
|
||||
for (key, id) in soft_external_ids {
|
||||
writeln!(&mut snap, "{key:<24} {id}").unwrap();
|
||||
}
|
||||
writeln!(&mut snap, "hard:").unwrap();
|
||||
let stream_hard = hard.stream();
|
||||
let hard_external_ids = stream_hard.into_str_vec().unwrap();
|
||||
for (key, id) in hard_external_ids {
|
||||
writeln!(&mut snap, "docids:").unwrap();
|
||||
for (key, id) in external_ids {
|
||||
writeln!(&mut snap, "{key:<24} {id}").unwrap();
|
||||
}
|
||||
|
||||
snap
|
||||
}
|
||||
pub fn snap_number_faceted_documents_ids(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut snap = String::new();
|
||||
for field_id in fields_ids_map.ids() {
|
||||
let number_faceted_documents_ids =
|
||||
index.faceted_documents_ids(&rtxn, field_id, FacetType::Number).unwrap();
|
||||
writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&number_faceted_documents_ids))
|
||||
.unwrap();
|
||||
}
|
||||
snap
|
||||
}
|
||||
pub fn snap_string_faceted_documents_ids(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
|
||||
let mut snap = String::new();
|
||||
for field_id in fields_ids_map.ids() {
|
||||
let string_faceted_documents_ids =
|
||||
index.faceted_documents_ids(&rtxn, field_id, FacetType::String).unwrap();
|
||||
writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&string_faceted_documents_ids))
|
||||
.unwrap();
|
||||
}
|
||||
snap
|
||||
}
|
||||
pub fn snap_words_fst(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let words_fst = index.words_fst(&rtxn).unwrap();
|
||||
@ -516,9 +463,6 @@ macro_rules! full_snap_of_db {
|
||||
($index:ident, stop_words) => {{
|
||||
$crate::snapshot_tests::snap_stop_words(&$index)
|
||||
}};
|
||||
($index:ident, soft_deleted_documents_ids) => {{
|
||||
$crate::snapshot_tests::snap_soft_deleted_documents_ids(&$index)
|
||||
}};
|
||||
($index:ident, field_distribution) => {{
|
||||
$crate::snapshot_tests::snap_field_distributions(&$index)
|
||||
}};
|
||||
@ -531,12 +475,6 @@ macro_rules! full_snap_of_db {
|
||||
($index:ident, external_documents_ids) => {{
|
||||
$crate::snapshot_tests::snap_external_documents_ids(&$index)
|
||||
}};
|
||||
($index:ident, number_faceted_documents_ids) => {{
|
||||
$crate::snapshot_tests::snap_number_faceted_documents_ids(&$index)
|
||||
}};
|
||||
($index:ident, string_faceted_documents_ids) => {{
|
||||
$crate::snapshot_tests::snap_string_faceted_documents_ids(&$index)
|
||||
}};
|
||||
($index:ident, words_fst) => {{
|
||||
$crate::snapshot_tests::snap_words_fst(&$index)
|
||||
}};
|
||||
|
@ -8,16 +8,11 @@ pub struct AvailableDocumentsIds {
|
||||
}
|
||||
|
||||
impl AvailableDocumentsIds {
|
||||
pub fn from_documents_ids(
|
||||
docids: &RoaringBitmap,
|
||||
soft_deleted_docids: &RoaringBitmap,
|
||||
) -> AvailableDocumentsIds {
|
||||
let used_docids = docids | soft_deleted_docids;
|
||||
|
||||
match used_docids.max() {
|
||||
pub fn from_documents_ids(docids: &RoaringBitmap) -> AvailableDocumentsIds {
|
||||
match docids.max() {
|
||||
Some(last_id) => {
|
||||
let mut available = RoaringBitmap::from_iter(0..last_id);
|
||||
available -= used_docids;
|
||||
available -= docids;
|
||||
|
||||
let iter = match last_id.checked_add(1) {
|
||||
Some(id) => id..=u32::max_value(),
|
||||
@ -50,7 +45,7 @@ mod tests {
|
||||
#[test]
|
||||
fn empty() {
|
||||
let base = RoaringBitmap::new();
|
||||
let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new());
|
||||
let left = AvailableDocumentsIds::from_documents_ids(&base);
|
||||
let right = 0..=u32::max_value();
|
||||
left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
|
||||
}
|
||||
@ -63,28 +58,8 @@ mod tests {
|
||||
base.insert(100);
|
||||
base.insert(405);
|
||||
|
||||
let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new());
|
||||
let left = AvailableDocumentsIds::from_documents_ids(&base);
|
||||
let right = (0..=u32::max_value()).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405);
|
||||
left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn soft_deleted() {
|
||||
let mut base = RoaringBitmap::new();
|
||||
base.insert(0);
|
||||
base.insert(10);
|
||||
base.insert(100);
|
||||
base.insert(405);
|
||||
|
||||
let mut soft_deleted = RoaringBitmap::new();
|
||||
soft_deleted.insert(1);
|
||||
soft_deleted.insert(11);
|
||||
soft_deleted.insert(101);
|
||||
soft_deleted.insert(406);
|
||||
|
||||
let left = AvailableDocumentsIds::from_documents_ids(&base, &soft_deleted);
|
||||
let right =
|
||||
(0..=u32::max_value()).filter(|&n| ![0, 1, 10, 11, 100, 101, 405, 406].contains(&n));
|
||||
left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,7 @@
|
||||
use roaring::RoaringBitmap;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::{ExternalDocumentsIds, FieldDistribution, Index, Result};
|
||||
use crate::{FieldDistribution, Index, Result};
|
||||
|
||||
pub struct ClearDocuments<'t, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
@ -21,13 +20,12 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
let Index {
|
||||
env: _env,
|
||||
main: _main,
|
||||
external_documents_ids,
|
||||
word_docids,
|
||||
exact_word_docids,
|
||||
word_prefix_docids,
|
||||
exact_word_prefix_docids,
|
||||
word_pair_proximity_docids,
|
||||
word_prefix_pair_proximity_docids,
|
||||
prefix_word_pair_proximity_docids,
|
||||
word_position_docids,
|
||||
word_fid_docids,
|
||||
field_id_word_count_docids,
|
||||
@ -51,43 +49,23 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
|
||||
// We retrieve the number of documents ids that we are deleting.
|
||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
|
||||
// We clean some of the main engine datastructures.
|
||||
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
|
||||
self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
|
||||
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
|
||||
self.index.put_documents_ids(self.wtxn, &empty_roaring)?;
|
||||
self.index.put_soft_deleted_documents_ids(self.wtxn, &empty_roaring)?;
|
||||
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
||||
self.index.delete_geo_rtree(self.wtxn)?;
|
||||
self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
|
||||
self.index.delete_vector_hnsw(self.wtxn)?;
|
||||
|
||||
// We clean all the faceted documents ids.
|
||||
for field_id in faceted_fields {
|
||||
self.index.put_faceted_documents_ids(
|
||||
self.wtxn,
|
||||
field_id,
|
||||
FacetType::Number,
|
||||
&empty_roaring,
|
||||
)?;
|
||||
self.index.put_faceted_documents_ids(
|
||||
self.wtxn,
|
||||
field_id,
|
||||
FacetType::String,
|
||||
&empty_roaring,
|
||||
)?;
|
||||
}
|
||||
|
||||
// Clear the other databases.
|
||||
external_documents_ids.clear(self.wtxn)?;
|
||||
word_docids.clear(self.wtxn)?;
|
||||
exact_word_docids.clear(self.wtxn)?;
|
||||
word_prefix_docids.clear(self.wtxn)?;
|
||||
exact_word_prefix_docids.clear(self.wtxn)?;
|
||||
word_pair_proximity_docids.clear(self.wtxn)?;
|
||||
word_prefix_pair_proximity_docids.clear(self.wtxn)?;
|
||||
prefix_word_pair_proximity_docids.clear(self.wtxn)?;
|
||||
word_position_docids.clear(self.wtxn)?;
|
||||
word_fid_docids.clear(self.wtxn)?;
|
||||
field_id_word_count_docids.clear(self.wtxn)?;
|
||||
@ -140,7 +118,7 @@ mod tests {
|
||||
|
||||
assert!(index.words_fst(&rtxn).unwrap().is_empty());
|
||||
assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
|
||||
assert!(index.external_documents_ids(&rtxn).unwrap().is_empty());
|
||||
assert!(index.external_documents_ids().is_empty(&rtxn).unwrap());
|
||||
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
||||
assert!(index.field_distribution(&rtxn).unwrap().is_empty());
|
||||
assert!(index.geo_rtree(&rtxn).unwrap().is_none());
|
||||
@ -150,7 +128,6 @@ mod tests {
|
||||
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
||||
assert!(index.word_pair_proximity_docids.is_empty(&rtxn).unwrap());
|
||||
assert!(index.field_id_word_count_docids.is_empty(&rtxn).unwrap());
|
||||
assert!(index.word_prefix_pair_proximity_docids.is_empty(&rtxn).unwrap());
|
||||
assert!(index.facet_id_f64_docids.is_empty(&rtxn).unwrap());
|
||||
assert!(index.facet_id_string_docids.is_empty(&rtxn).unwrap());
|
||||
assert!(index.field_id_docid_facet_f64s.is_empty(&rtxn).unwrap());
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user