mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-10 22:55:43 +00:00
Compare commits
2 Commits
panic-repo
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
42bbfebf70 | ||
|
|
5637978fe4 |
8
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
8
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
@@ -7,17 +7,19 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
Related product team resources: [PRD]() (_internal only_)
|
||||
Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
|
||||
Related product discussion:
|
||||
Related spec: WIP
|
||||
|
||||
## Motivation
|
||||
|
||||
<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->
|
||||
<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
|
||||
|
||||
## Usage
|
||||
|
||||
<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->
|
||||
<!---Write a quick description of the usage if the usage has already been defined-->
|
||||
|
||||
Refer to the final spec to know the details and the final decisions about the usage.
|
||||
|
||||
## TODO
|
||||
|
||||
|
||||
2
.github/workflows/benchmarks-manual.yml
vendored
2
.github/workflows/benchmarks-manual.yml
vendored
@@ -74,4 +74,4 @@ jobs:
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
|
||||
echo " - Run the following command: ./benchmaks/scripts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
|
||||
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
|
||||
|
||||
98
.github/workflows/benchmarks-pr.yml
vendored
98
.github/workflows/benchmarks-pr.yml
vendored
@@ -1,98 +0,0 @@
|
||||
name: Benchmarks (PR)
|
||||
on: issue_comment
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
|
||||
jobs:
|
||||
run-benchmarks-on-comment:
|
||||
if: startsWith(github.event.comment.body, '/benchmark')
|
||||
name: Run and upload benchmarks
|
||||
runs-on: benchmarks
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Check for Command
|
||||
id: command
|
||||
uses: xt0rted/slash-command-action@v2
|
||||
with:
|
||||
command: benchmark
|
||||
reaction-type: "eyes"
|
||||
repo-token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: xt0rted/pull-request-comment-branch@v2
|
||||
id: comment-branch
|
||||
with:
|
||||
repo_token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
if: success()
|
||||
with:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
shell: bash
|
||||
run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT
|
||||
id: current_branch
|
||||
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
|
||||
shell: bash
|
||||
run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT
|
||||
id: normalized_current_branch
|
||||
- name: Set shorter commit SHA
|
||||
shell: bash
|
||||
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
|
||||
id: commit_sha
|
||||
- name: Set file basename with format "dataset_branch_commitSHA"
|
||||
shell: bash
|
||||
run: echo "basename=$(echo ${{ steps.command.outputs.command-arguments }}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
|
||||
id: file
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${{ steps.command.outputs.command-arguments }} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
|
||||
run: |
|
||||
cd benchmarks
|
||||
cargo bench --bench ${{ steps.command.outputs.command-arguments }} -- --save-baseline ${{ steps.file.outputs.basename }}
|
||||
|
||||
# Generate critcmp files
|
||||
- name: Install critcmp
|
||||
uses: taiki-e/install-action@v2
|
||||
with:
|
||||
tool: critcmp
|
||||
- name: Export cripcmp file
|
||||
run: |
|
||||
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
|
||||
|
||||
# Upload benchmarks
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
|
||||
uses: BetaHuhn/do-spaces-action@v2
|
||||
with:
|
||||
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
|
||||
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
|
||||
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
|
||||
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
|
||||
source: ${{ steps.file.outputs.basename }}.json
|
||||
out_dir: critcmp_results
|
||||
|
||||
# Compute the diff of the benchmarks and send a message on the GitHub PR
|
||||
- name: Compute and send a message in the PR
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
run: |
|
||||
set -x
|
||||
export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
|
||||
export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
|
||||
export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
|
||||
echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
|
||||
echo '```' >> body.txt
|
||||
./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
|
||||
echo '```' >> body.txt
|
||||
gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt
|
||||
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
needs: check-version
|
||||
steps:
|
||||
- name: Create PR to Homebrew
|
||||
uses: mislav/bump-homebrew-formula-action@v3
|
||||
uses: mislav/bump-homebrew-formula-action@v2
|
||||
with:
|
||||
formula-name: meilisearch
|
||||
formula-path: Formula/m/meilisearch.rb
|
||||
|
||||
10
.github/workflows/publish-docker-images.yml
vendored
10
.github/workflows/publish-docker-images.yml
vendored
@@ -57,20 +57,20 @@ jobs:
|
||||
echo "date=$commit_date" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: getmeili/meilisearch
|
||||
# Prevent `latest` to be updated for each new tag pushed.
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
4
.github/workflows/sdks-tests.yml
vendored
4
.github/workflows/sdks-tests.yml
vendored
@@ -160,7 +160,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
cache: 'yarn'
|
||||
- name: Install dependencies
|
||||
@@ -318,7 +318,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js-plugins
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
cache: yarn
|
||||
- name: Install dependencies
|
||||
|
||||
10
.github/workflows/test-suite.yml
vendored
10
.github/workflows/test-suite.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -149,7 +149,7 @@ jobs:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -168,7 +168,7 @@ jobs:
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -187,7 +187,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
||||
72
Cargo.lock
generated
72
Cargo.lock
generated
@@ -231,9 +231,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.21.0"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
|
||||
checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
@@ -435,9 +435,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.69"
|
||||
version = "0.3.68"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
|
||||
checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cc",
|
||||
@@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -1206,7 +1206,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -1417,7 +1417,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
@@ -1439,7 +1439,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@@ -1459,7 +1459,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -1577,7 +1577,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap",
|
||||
@@ -1638,9 +1638,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.28.0"
|
||||
version = "0.27.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
|
||||
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
|
||||
|
||||
[[package]]
|
||||
name = "git2"
|
||||
@@ -1891,10 +1891,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"backtrace",
|
||||
"big_s",
|
||||
"bincode",
|
||||
"crossbeam",
|
||||
@@ -2089,7 +2088,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -2501,7 +2500,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@@ -2510,7 +2509,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@@ -2565,6 +2564,7 @@ dependencies = [
|
||||
"platform-dirs",
|
||||
"prometheus",
|
||||
"puffin",
|
||||
"puffin_http",
|
||||
"rand",
|
||||
"rayon",
|
||||
"regex",
|
||||
@@ -2600,7 +2600,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"base64 0.21.2",
|
||||
"enum-iterator",
|
||||
@@ -2619,7 +2619,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@@ -2673,7 +2673,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"bimap",
|
||||
@@ -2857,9 +2857,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.32.1"
|
||||
version = "0.31.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
|
||||
checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
@@ -2995,7 +2995,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@@ -3193,7 +3193,7 @@ dependencies = [
|
||||
"byteorder",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"rustix 0.36.16",
|
||||
"rustix 0.36.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3236,6 +3236,18 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "puffin_http"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13bffc600c35913d282ae1e96a6ffcdf36dc7a7cdb9310e0ba15914d258c8193"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"crossbeam-channel",
|
||||
"log",
|
||||
"puffin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.32"
|
||||
@@ -3466,9 +3478,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.36.16"
|
||||
version = "0.36.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6da3636faa25820d8648e0e31c5d519bbb01f72fdf57131f0f5f7da5fed36eab"
|
||||
checksum = "c37f1bd5ef1b5422177b7646cba67430579cfe2ace80f284fee876bca52ad941"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"errno",
|
||||
@@ -3641,9 +3653,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.108"
|
||||
version = "1.0.104"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
|
||||
checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c"
|
||||
dependencies = [
|
||||
"indexmap 2.0.0",
|
||||
"itoa",
|
||||
@@ -4431,9 +4443,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "webpki"
|
||||
version = "0.22.2"
|
||||
version = "0.22.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07ecc0cd7cac091bf682ec5efa18b1cff79d617b84181f38b3951dbe135f607f"
|
||||
checksum = "f0e74f82d49d545ad128049b7e88f6576df2da6b02e9ce565c6f533be576957e"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"untrusted",
|
||||
|
||||
@@ -18,7 +18,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
@@ -28,7 +28,6 @@ license = "MIT"
|
||||
|
||||
[profile.release]
|
||||
codegen-units = 1
|
||||
debug = true
|
||||
|
||||
[profile.dev.package.flate2]
|
||||
opt-level = 3
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
# Profiling Meilisearch
|
||||
|
||||
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).
|
||||
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
|
||||
|
||||

|
||||
|
||||
## Profiling the Indexing Process
|
||||
|
||||
When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||
|
||||
[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.
|
||||
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
|
||||
|
||||
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
|
||||
|
||||
|
||||
@@ -526,12 +526,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "2022-10-09T20:27:22.688964637Z",
|
||||
"updatedAt": "2022-10-09T20:27:23.951017769Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -541,12 +541,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "2022-10-09T20:27:22.197788495Z",
|
||||
"updatedAt": "2022-10-09T20:28:01.93111053Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -571,12 +571,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "2022-10-09T20:27:24.242683494Z",
|
||||
"updatedAt": "2022-10-09T20:27:24.312809641Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -617,12 +617,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "2023-01-30T16:25:56.595257Z",
|
||||
"updatedAt": "2023-01-30T16:25:58.70348Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -632,12 +632,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "2023-01-30T16:25:56.192178Z",
|
||||
"updatedAt": "2023-01-30T16:25:56.455714Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -647,12 +647,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "2023-01-30T16:25:58.876405Z",
|
||||
"updatedAt": "2023-01-30T16:25:59.079906Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
|
||||
@@ -46,7 +46,6 @@ pub type Checked = settings::Checked;
|
||||
pub type Unchecked = settings::Unchecked;
|
||||
|
||||
pub type Task = updates::UpdateEntry;
|
||||
pub type Kind = updates::UpdateMeta;
|
||||
|
||||
// everything related to the errors
|
||||
pub type ResponseError = errors::ResponseError;
|
||||
@@ -108,11 +107,8 @@ impl V2Reader {
|
||||
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
|
||||
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
|
||||
V2IndexReader::new(
|
||||
index.uid.clone(),
|
||||
&self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
|
||||
index,
|
||||
BufReader::new(
|
||||
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
|
||||
),
|
||||
)
|
||||
}))
|
||||
}
|
||||
@@ -147,41 +143,16 @@ pub struct V2IndexReader {
|
||||
}
|
||||
|
||||
impl V2IndexReader {
|
||||
pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
|
||||
pub fn new(name: String, path: &Path) -> Result<Self> {
|
||||
let meta = File::open(path.join("meta.json"))?;
|
||||
let meta: DumpMeta = serde_json::from_reader(meta)?;
|
||||
|
||||
let mut created_at = None;
|
||||
let mut updated_at = None;
|
||||
|
||||
for line in tasks.lines() {
|
||||
let task: Task = serde_json::from_str(&line?)?;
|
||||
if !(task.uuid == index_uuid.uuid && task.is_finished()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let new_created_at = match task.update.meta() {
|
||||
Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
|
||||
_ => None,
|
||||
};
|
||||
let new_updated_at = task.update.finished_at();
|
||||
|
||||
if created_at.is_none() || created_at > new_created_at {
|
||||
created_at = new_created_at;
|
||||
}
|
||||
|
||||
if updated_at.is_none() || updated_at < new_updated_at {
|
||||
updated_at = new_updated_at;
|
||||
}
|
||||
}
|
||||
|
||||
let current_time = OffsetDateTime::now_utc();
|
||||
|
||||
let metadata = IndexMetadata {
|
||||
uid: index_uuid.uid.clone(),
|
||||
uid: name,
|
||||
primary_key: meta.primary_key,
|
||||
created_at: created_at.unwrap_or(current_time),
|
||||
updated_at: updated_at.unwrap_or(current_time),
|
||||
// FIXME: Iterate over the whole task queue to find the creation and last update date.
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
updated_at: OffsetDateTime::now_utc(),
|
||||
};
|
||||
|
||||
let ret = V2IndexReader {
|
||||
@@ -277,12 +248,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "2022-10-09T20:27:22.688964637Z",
|
||||
"updatedAt": "2022-10-09T20:27:23.951017769Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -292,12 +263,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "2022-10-09T20:27:22.197788495Z",
|
||||
"updatedAt": "2022-10-09T20:28:01.93111053Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -322,12 +293,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "2022-10-09T20:27:24.242683494Z",
|
||||
"updatedAt": "2022-10-09T20:27:24.312809641Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -369,12 +340,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "2023-01-30T16:25:56.595257Z",
|
||||
"updatedAt": "2023-01-30T16:25:58.70348Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -384,12 +355,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "2023-01-30T16:25:56.192178Z",
|
||||
"updatedAt": "2023-01-30T16:25:56.455714Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -399,12 +370,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "2023-01-30T16:25:58.876405Z",
|
||||
"updatedAt": "2023-01-30T16:25:59.079906Z"
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
|
||||
@@ -227,14 +227,4 @@ impl UpdateStatus {
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finished_at(&self) -> Option<OffsetDateTime> {
|
||||
match self {
|
||||
UpdateStatus::Processing(_) => None,
|
||||
UpdateStatus::Enqueued(_) => None,
|
||||
UpdateStatus::Processed(u) => Some(u.processed_at),
|
||||
UpdateStatus::Aborted(_) => None,
|
||||
UpdateStatus::Failed(u) => Some(u.failed_at),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.70"
|
||||
backtrace = "0.3.69"
|
||||
bincode = "1.3.3"
|
||||
csv = "1.2.1"
|
||||
derive_builder = "0.12.0"
|
||||
|
||||
@@ -19,7 +19,6 @@ one indexing operation.
|
||||
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt;
|
||||
use std::fs::{self, File};
|
||||
use std::io::BufWriter;
|
||||
|
||||
@@ -200,29 +199,6 @@ impl Batch {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Batch {
|
||||
/// A text used when we debug the profiling reports.
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let index_uid = self.index_uid();
|
||||
let tasks = self.ids();
|
||||
match self {
|
||||
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
|
||||
Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
|
||||
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
|
||||
Batch::Dump(_) => f.write_str("Dump")?,
|
||||
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
|
||||
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
};
|
||||
match index_uid {
|
||||
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
|
||||
None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexOperation {
|
||||
pub fn index_uid(&self) -> &str {
|
||||
match self {
|
||||
@@ -237,30 +213,6 @@ impl IndexOperation {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for IndexOperation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::DocumentOperation")
|
||||
}
|
||||
IndexOperation::DocumentDeletion { .. } => {
|
||||
f.write_str("IndexOperation::DocumentDeletion")
|
||||
}
|
||||
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
|
||||
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
|
||||
}
|
||||
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
|
||||
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
|
||||
IndexOperation::DocumentClearAndSetting { .. } => {
|
||||
f.write_str("IndexOperation::DocumentClearAndSetting")
|
||||
}
|
||||
IndexOperation::SettingsAndDocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::SettingsAndDocumentOperation")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
|
||||
///
|
||||
@@ -629,7 +581,7 @@ impl IndexScheduler {
|
||||
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
||||
}
|
||||
|
||||
puffin::profile_function!(batch.to_string());
|
||||
puffin::profile_function!(format!("{:?}", batch));
|
||||
|
||||
match batch {
|
||||
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
||||
@@ -825,10 +777,6 @@ impl IndexScheduler {
|
||||
// 2. dump the tasks
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
for ret in self.all_tasks.iter(&rtxn)? {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (_, mut t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
@@ -849,9 +797,6 @@ impl IndexScheduler {
|
||||
|
||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file) = content_file {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
if status == Status::Enqueued {
|
||||
let content_file = self.file_store.get_update(content_file)?;
|
||||
|
||||
@@ -891,9 +836,6 @@ impl IndexScheduler {
|
||||
|
||||
// 3.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
let (_id, doc) = ret?;
|
||||
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
index_dumper.push_document(&document)?;
|
||||
@@ -906,16 +848,13 @@ impl IndexScheduler {
|
||||
})?;
|
||||
|
||||
// 4. Dump experimental feature settings
|
||||
let features = self.features().runtime_features();
|
||||
let features = self.features()?.runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
|
||||
let file = File::create(path)?;
|
||||
dump.persist_to(BufWriter::new(file))?;
|
||||
|
||||
@@ -108,8 +108,6 @@ pub enum Error {
|
||||
TaskDeletionWithEmptyQuery,
|
||||
#[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
|
||||
TaskCancelationWithEmptyQuery,
|
||||
#[error("Aborted task")]
|
||||
AbortedTask,
|
||||
|
||||
#[error(transparent)]
|
||||
Dump(#[from] dump::Error),
|
||||
@@ -117,13 +115,8 @@ pub enum Error {
|
||||
Heed(#[from] heed::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] milli::Error),
|
||||
#[error("An unexpected crash occurred when processing the task. {}", {
|
||||
match .0 {
|
||||
Some(report) => format!("Get /reports/{}", report),
|
||||
None => "No report was saved.".into(),
|
||||
}
|
||||
})]
|
||||
ProcessBatchPanicked(Option<uuid::Uuid>),
|
||||
#[error("An unexpected crash occurred when processing the task.")]
|
||||
ProcessBatchPanicked,
|
||||
#[error(transparent)]
|
||||
FileStore(#[from] file_store::Error),
|
||||
#[error(transparent)]
|
||||
@@ -182,11 +175,10 @@ impl Error {
|
||||
| Error::TaskNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::AbortedTask
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli(_)
|
||||
| Error::ProcessBatchPanicked(_)
|
||||
| Error::ProcessBatchPanicked
|
||||
| Error::FileStore(_)
|
||||
| Error::IoError(_)
|
||||
| Error::Persist(_)
|
||||
@@ -229,7 +221,7 @@ impl ErrorCode for Error {
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli(e) => e.error_code(),
|
||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||
Error::ProcessBatchPanicked => Code::Internal,
|
||||
Error::Heed(e) => e.error_code(),
|
||||
Error::HeedTransaction(e) => e.error_code(),
|
||||
Error::FileStore(e) => e.error_code(),
|
||||
@@ -244,9 +236,6 @@ impl ErrorCode for Error {
|
||||
Error::TaskDatabaseUpdate(_) => Code::Internal,
|
||||
Error::CreateBatch(_) => Code::Internal,
|
||||
|
||||
// This one should never be seen by the end user
|
||||
Error::AbortedTask => Code::Internal,
|
||||
|
||||
#[cfg(test)]
|
||||
Error::PlannedFailure => Code::Internal,
|
||||
}
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
@@ -11,19 +9,20 @@ const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct FeatureData {
|
||||
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
|
||||
runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
instance: InstanceTogglableFeatures,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct RoFeatures {
|
||||
runtime: RuntimeTogglableFeatures,
|
||||
instance: InstanceTogglableFeatures,
|
||||
}
|
||||
|
||||
impl RoFeatures {
|
||||
fn new(data: &FeatureData) -> Self {
|
||||
let runtime = data.runtime_features();
|
||||
Self { runtime }
|
||||
fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
|
||||
let runtime = data.runtime_features(txn)?;
|
||||
Ok(Self { runtime, instance: data.instance })
|
||||
}
|
||||
|
||||
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
||||
@@ -44,13 +43,13 @@ impl RoFeatures {
|
||||
}
|
||||
|
||||
pub fn check_metrics(&self) -> Result<()> {
|
||||
if self.runtime.metrics {
|
||||
if self.instance.metrics {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Getting metrics",
|
||||
feature: "metrics",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/625",
|
||||
issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
@@ -68,36 +67,15 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_puffin(&self) -> Result<()> {
|
||||
if self.runtime.export_puffin_reports {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Outputting Puffin reports to disk",
|
||||
feature: "export puffin reports",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/693",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||
let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
let txn = env.read_txn()?;
|
||||
let persisted_features: RuntimeTogglableFeatures =
|
||||
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: instance_features.metrics || persisted_features.metrics,
|
||||
..persisted_features
|
||||
}));
|
||||
|
||||
Ok(Self { persisted: runtime_features_db, runtime })
|
||||
Ok(Self { runtime: runtime_features, instance: instance_features })
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(
|
||||
@@ -105,25 +83,16 @@ impl FeatureData {
|
||||
mut wtxn: RwTxn,
|
||||
features: RuntimeTogglableFeatures,
|
||||
) -> Result<()> {
|
||||
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||
self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
// safe to unwrap, the lock will only fail if:
|
||||
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
|
||||
// 2. there's a panic while the thread is held -> it is only used for an assignment here.
|
||||
let mut toggled_features = self.runtime.write().unwrap();
|
||||
*toggled_features = features;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
||||
// sound to unwrap, the lock will only fail if:
|
||||
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
|
||||
// 2. there's a panic while the thread is held -> it is only used for copying the data here
|
||||
*self.runtime.read().unwrap()
|
||||
fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
|
||||
Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn features(&self) -> RoFeatures {
|
||||
RoFeatures::new(self)
|
||||
pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
|
||||
RoFeatures::new(txn, self)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,7 +30,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
index_mapper,
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
puffin_frame: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
@@ -39,7 +38,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
test_breakpoint_sdr: _,
|
||||
planned_failures: _,
|
||||
run_loop_iteration: _,
|
||||
panic_reader: _,
|
||||
} = scheduler;
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
|
||||
@@ -26,7 +26,6 @@ mod index_mapper;
|
||||
#[cfg(test)]
|
||||
mod insta_snapshot;
|
||||
mod lru;
|
||||
mod panic_hook;
|
||||
mod utils;
|
||||
mod uuid_codec;
|
||||
|
||||
@@ -34,7 +33,6 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fs::File;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
@@ -54,9 +52,6 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use panic_hook::ReportReader;
|
||||
pub use panic_hook::{Panic, Report, ReportRegistry};
|
||||
use puffin::FrameView;
|
||||
use roaring::RoaringBitmap;
|
||||
use synchronoise::SignalEvent;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
@@ -319,9 +314,6 @@ pub struct IndexScheduler {
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
|
||||
/// A frame to output the indexation profiling files to disk.
|
||||
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
|
||||
|
||||
/// The path used to create the dumps.
|
||||
pub(crate) dumps_path: PathBuf,
|
||||
|
||||
@@ -334,8 +326,6 @@ pub struct IndexScheduler {
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
|
||||
pub(crate) panic_reader: ReportReader,
|
||||
|
||||
// ================= test
|
||||
// The next entry is dedicated to the tests.
|
||||
/// Provide a way to set a breakpoint in multiple part of the scheduler.
|
||||
@@ -374,7 +364,6 @@ impl IndexScheduler {
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
puffin_frame: self.puffin_frame.clone(),
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
@@ -386,7 +375,6 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: self.run_loop_iteration.clone(),
|
||||
features: self.features.clone(),
|
||||
panic_reader: self.panic_reader.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -444,12 +432,6 @@ impl IndexScheduler {
|
||||
let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
const MAX_REPORT_COUNT: usize = 20;
|
||||
|
||||
let panic_reader = panic_hook::ReportReader::install_panic_hook(
|
||||
std::num::NonZeroUsize::new(MAX_REPORT_COUNT).unwrap(),
|
||||
);
|
||||
|
||||
// allow unreachable_code to get rids of the warning in the case of a test build.
|
||||
let this = Self {
|
||||
must_stop_processing: MustStopProcessing::default(),
|
||||
@@ -475,7 +457,6 @@ impl IndexScheduler {
|
||||
env,
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
dumps_path: options.dumps_path,
|
||||
@@ -490,7 +471,6 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: Arc::new(RwLock::new(0)),
|
||||
features,
|
||||
panic_reader,
|
||||
};
|
||||
|
||||
this.run();
|
||||
@@ -592,46 +572,17 @@ impl IndexScheduler {
|
||||
run.wake_up.wait();
|
||||
|
||||
loop {
|
||||
let puffin_enabled = run.features().check_puffin().is_ok();
|
||||
puffin::set_scopes_on(puffin_enabled);
|
||||
puffin::GlobalProfiler::lock().new_frame();
|
||||
|
||||
match run.tick() {
|
||||
Ok(TickOutcome::TickAgain(_)) => (),
|
||||
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
|
||||
Err(e) => {
|
||||
log::error!("{e}");
|
||||
log::error!("{}", e);
|
||||
// Wait one second when an irrecoverable error occurs.
|
||||
if !e.is_recoverable() {
|
||||
std::thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Let's write the previous frame to disk but only if
|
||||
// the user wanted to profile with puffin.
|
||||
if puffin_enabled {
|
||||
let mut frame_view = run.puffin_frame.lock();
|
||||
if !frame_view.is_empty() {
|
||||
let now = OffsetDateTime::now_utc();
|
||||
let mut file = match File::create(format!("{}.puffin", now)) {
|
||||
Ok(file) => file,
|
||||
Err(e) => {
|
||||
log::error!("{e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(e) = frame_view.save_to_writer(&mut file) {
|
||||
log::error!("{e}");
|
||||
}
|
||||
if let Err(e) = file.sync_all() {
|
||||
log::error!("{e}");
|
||||
}
|
||||
// We erase this frame view as it is no more useful. We want to
|
||||
// measure the new frames now that we exported the previous ones.
|
||||
*frame_view = FrameView::default();
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
@@ -1111,6 +1062,8 @@ impl IndexScheduler {
|
||||
self.breakpoint(Breakpoint::Start);
|
||||
}
|
||||
|
||||
puffin::GlobalProfiler::lock().new_frame();
|
||||
|
||||
self.cleanup_task_queue()?;
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
@@ -1143,10 +1096,7 @@ impl IndexScheduler {
|
||||
.name(String::from("batch-operation"))
|
||||
.spawn(move || cloned_index_scheduler.process_batch(batch))
|
||||
.unwrap();
|
||||
|
||||
self.panic_reader
|
||||
.join_thread(handle)
|
||||
.unwrap_or_else(|maybe_report| Err(Error::ProcessBatchPanicked(maybe_report)))
|
||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -1183,8 +1133,7 @@ impl IndexScheduler {
|
||||
// If we have an abortion error we must stop the tick here and re-schedule tasks.
|
||||
Err(Error::Milli(milli::Error::InternalError(
|
||||
milli::InternalError::AbortedIndexation,
|
||||
)))
|
||||
| Err(Error::AbortedTask) => {
|
||||
))) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(Breakpoint::AbortedIndexation);
|
||||
wtxn.abort().map_err(Error::HeedTransaction)?;
|
||||
@@ -1310,8 +1259,9 @@ impl IndexScheduler {
|
||||
Ok(IndexStats { is_indexing, inner_stats: index_stats })
|
||||
}
|
||||
|
||||
pub fn features(&self) -> RoFeatures {
|
||||
self.features.features()
|
||||
pub fn features(&self) -> Result<RoFeatures> {
|
||||
let rtxn = self.read_txn()?;
|
||||
self.features.features(rtxn)
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||
@@ -1327,10 +1277,6 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reports(&self) -> Arc<RwLock<ReportRegistry>> {
|
||||
self.panic_reader.registry()
|
||||
}
|
||||
|
||||
/// Blocks the thread until the test handle asks to progress to/through this breakpoint.
|
||||
///
|
||||
/// Two messages are sent through the channel for each breakpoint.
|
||||
@@ -4344,26 +4290,4 @@ mod tests {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_processing_dump() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
|
||||
let dump_cancellation = KindWithContent::TaskCancelation {
|
||||
query: "cancel dump".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0]),
|
||||
};
|
||||
let _ = index_scheduler.register(dump_creation).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
|
||||
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
|
||||
|
||||
let _ = index_scheduler.register(dump_cancellation).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
|
||||
|
||||
snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,211 +0,0 @@
|
||||
//! Panic hook designed to fetch a panic from a subthread and recover it on join.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::panic::PanicInfo;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::thread::{JoinHandle, ThreadId};
|
||||
|
||||
use backtrace::Backtrace;
|
||||
|
||||
// Represents a panic in a shallowy structured fashion
|
||||
pub struct Panic {
|
||||
pub payload: Option<String>,
|
||||
pub location: Option<String>,
|
||||
pub thread_name: Option<String>,
|
||||
pub thread_id: ThreadId,
|
||||
pub backtrace: Backtrace,
|
||||
}
|
||||
|
||||
/// A panic enriched with a unique id
|
||||
#[derive(serde::Serialize)]
|
||||
pub struct Report {
|
||||
pub id: uuid::Uuid,
|
||||
#[serde(serialize_with = "serialize_panic")]
|
||||
pub panic: Panic,
|
||||
}
|
||||
|
||||
fn serialize_panic<S>(panic: &Panic, s: S) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
use serde::Serialize;
|
||||
|
||||
panic.to_json().serialize(s)
|
||||
}
|
||||
|
||||
impl Report {
|
||||
pub fn new(panic: Panic) -> Self {
|
||||
Self { id: uuid::Uuid::new_v4(), panic }
|
||||
}
|
||||
}
|
||||
|
||||
impl Panic {
|
||||
pub fn to_json(&self) -> serde_json::Value {
|
||||
json::panic_to_json(self)
|
||||
}
|
||||
}
|
||||
|
||||
mod json {
|
||||
use backtrace::{Backtrace, BacktraceFrame, BacktraceSymbol};
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use super::Panic;
|
||||
|
||||
fn symbol_to_json(symbol: &BacktraceSymbol) -> Value {
|
||||
let address = symbol.addr().map(|addr| format!("{:p}", addr));
|
||||
let column = symbol.colno();
|
||||
let line = symbol.lineno();
|
||||
let function = symbol.name().map(|name| name.to_string());
|
||||
let filename = symbol.filename();
|
||||
json!({
|
||||
"function": function,
|
||||
"filename": filename,
|
||||
"line": line,
|
||||
"column": column,
|
||||
"address": address,
|
||||
})
|
||||
}
|
||||
|
||||
fn frame_to_json(frame: &BacktraceFrame) -> Value {
|
||||
let symbols: Vec<_> = frame.symbols().iter().map(symbol_to_json).collect();
|
||||
match symbols.as_slice() {
|
||||
[] => {
|
||||
let address = format!("{:p}", frame.ip());
|
||||
json!({"address": address})
|
||||
}
|
||||
[symbol] => json!(symbol),
|
||||
symbols => json!(symbols),
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_to_json(backtrace: &Backtrace) -> Value {
|
||||
let frames: Vec<_> = backtrace.frames().iter().map(frame_to_json).collect();
|
||||
json!(frames)
|
||||
}
|
||||
|
||||
pub fn panic_to_json(panic: &Panic) -> Value {
|
||||
let thread_id = format!("{:?}", panic.thread_id);
|
||||
serde_json::json!({
|
||||
"payload": panic.payload,
|
||||
"location": panic.location,
|
||||
"thread": {
|
||||
"id": thread_id,
|
||||
"name": panic.thread_name,
|
||||
},
|
||||
"backtrace": backtrace_to_json(&panic.backtrace),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct ReportWriter(Arc<RwLock<ReportRegistry>>);
|
||||
|
||||
/// A FIFO queue of reports.
|
||||
pub struct ReportRegistry {
|
||||
reports: std::collections::VecDeque<Report>,
|
||||
}
|
||||
|
||||
impl ReportRegistry {
|
||||
pub fn new(capacity: NonZeroUsize) -> Self {
|
||||
Self { reports: VecDeque::with_capacity(capacity.get()) }
|
||||
}
|
||||
|
||||
pub fn push(&mut self, report: Report) -> Option<Report> {
|
||||
let popped = if self.reports.len() == self.reports.capacity() {
|
||||
self.reports.pop_back()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.reports.push_front(report);
|
||||
popped
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &Report> {
|
||||
self.reports.iter()
|
||||
}
|
||||
|
||||
pub fn find(&self, report_id: uuid::Uuid) -> Option<&Report> {
|
||||
self.iter().find(|report| report.id == report_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReportWriter {
|
||||
#[track_caller]
|
||||
fn write_panic(&self, panic_info: &PanicInfo<'_>) {
|
||||
let payload = panic_info
|
||||
.payload()
|
||||
.downcast_ref::<&str>()
|
||||
.map(ToString::to_string)
|
||||
.or_else(|| panic_info.payload().downcast_ref::<String>().cloned());
|
||||
let location = panic_info.location().map(|loc| {
|
||||
format!(
|
||||
"{file}:{line}:{column}",
|
||||
file = loc.file(),
|
||||
line = loc.line(),
|
||||
column = loc.column()
|
||||
)
|
||||
});
|
||||
|
||||
let thread_name = std::thread::current().name().map(ToString::to_string);
|
||||
let thread_id = std::thread::current().id();
|
||||
let backtrace = backtrace::Backtrace::new();
|
||||
|
||||
let panic = Panic { payload, location, thread_name, thread_id, backtrace };
|
||||
|
||||
let report = Report::new(panic);
|
||||
|
||||
log::error!(
|
||||
"An unexpected panic occurred on thread {name} at {location}: {payload}. See report '{report}' for details.",
|
||||
payload = report.panic.payload.as_deref().unwrap_or("Box<dyn Any>"),
|
||||
name = report.panic.thread_name.as_deref().unwrap_or("<unnamed>"),
|
||||
location = report.panic.location.as_deref().unwrap_or("<unknown>"),
|
||||
report = report.id,
|
||||
);
|
||||
|
||||
if let Ok(mut registry) = self.0.write() {
|
||||
if let Some(old_report) = registry.push(report) {
|
||||
log::trace!("Forgetting report {} to make space for new report.", old_report.id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads the reports written in case of a panic.
|
||||
#[derive(Clone)]
|
||||
pub struct ReportReader(Arc<RwLock<ReportRegistry>>);
|
||||
|
||||
impl ReportReader {
|
||||
/// Installs a new global panic hook, overriding any existing hook.
|
||||
///
|
||||
/// The hook writes any incoming panic in reports.
|
||||
/// The reports can then be read by the returned [`ReportReader`].
|
||||
pub fn install_panic_hook(capacity: NonZeroUsize) -> Self {
|
||||
let registry = Arc::new(RwLock::new(ReportRegistry::new(capacity)));
|
||||
let reader = ReportReader(registry.clone());
|
||||
let writer = ReportWriter(registry.clone());
|
||||
|
||||
std::panic::set_hook(Box::new(move |panic_info| writer.write_panic(panic_info)));
|
||||
reader
|
||||
}
|
||||
|
||||
/// Join the thread corresponding to the passed handle, recovering either its value
|
||||
/// or, in case the thread panicked, the id of the report corresponding to the panic.
|
||||
///
|
||||
/// The id can be used to read the report from the [`self.registry()`].
|
||||
pub fn join_thread<T>(&self, thread: JoinHandle<T>) -> Result<T, Option<uuid::Uuid>> {
|
||||
let thread_id = thread.thread().id();
|
||||
thread.join().map_err(|_e| {
|
||||
self.0
|
||||
.read()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.find(|report| report.panic.thread_id == thread_id)
|
||||
.map(|report| report.id)
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a registry that can be used to read the reports written during a panic.
|
||||
pub fn registry(&self) -> Arc<RwLock<ReportRegistry>> {
|
||||
self.0.clone()
|
||||
}
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [1,]
|
||||
canceled [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"taskCancelation" [1,]
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
1 [0,]
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"taskCancelation" [1,]
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -88,6 +88,7 @@ pub trait ErrorCode {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
enum ErrorType {
|
||||
Internal,
|
||||
InvalidRequest,
|
||||
@@ -297,7 +298,6 @@ MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
|
||||
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
|
||||
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
|
||||
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
|
||||
ReportNotFound , InvalidRequest , NOT_FOUND ;
|
||||
TaskNotFound , InvalidRequest , NOT_FOUND ;
|
||||
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
|
||||
UnretrievableDocument , Internal , BAD_REQUEST ;
|
||||
|
||||
@@ -5,8 +5,6 @@ use serde::{Deserialize, Serialize};
|
||||
pub struct RuntimeTogglableFeatures {
|
||||
pub score_details: bool,
|
||||
pub vector_store: bool,
|
||||
pub metrics: bool,
|
||||
pub export_puffin_reports: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
|
||||
@@ -69,7 +69,8 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.9"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
||||
puffin = "0.16.0"
|
||||
puffin_http = { version = "0.13.0", optional = true }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.7.0"
|
||||
regex = "1.7.3"
|
||||
@@ -134,6 +135,7 @@ zip = { version = "0.6.4", optional = true }
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
analytics = ["segment"]
|
||||
profile-with-puffin = ["dep:puffin_http"]
|
||||
mini-dashboard = [
|
||||
"actix-web-static-files",
|
||||
"static-files",
|
||||
|
||||
@@ -51,8 +51,6 @@ pub enum MeilisearchHttpError {
|
||||
DocumentFormat(#[from] DocumentFormatError),
|
||||
#[error(transparent)]
|
||||
Join(#[from] JoinError),
|
||||
#[error("Report `{0}` not found. Either its id is incorrect, or it was deleted. To save on memory, only a limited amount of reports are kept.")]
|
||||
ReportNotFound(uuid::Uuid),
|
||||
}
|
||||
|
||||
impl ErrorCode for MeilisearchHttpError {
|
||||
@@ -76,7 +74,6 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||
MeilisearchHttpError::ReportNotFound(_) => Code::ReportNotFound,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,7 +114,10 @@ pub fn create_app(
|
||||
.configure(routes::configure)
|
||||
.configure(|s| dashboard(s, enable_dashboard));
|
||||
|
||||
let app = app.wrap(middleware::RouteMetrics);
|
||||
let app = app.wrap(actix_web::middleware::Condition::new(
|
||||
opt.experimental_enable_metrics,
|
||||
middleware::RouteMetrics,
|
||||
));
|
||||
app.wrap(
|
||||
Cors::default()
|
||||
.send_wildcard()
|
||||
|
||||
@@ -30,6 +30,10 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
#[cfg(feature = "profile-with-puffin")]
|
||||
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
|
||||
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
|
||||
|
||||
anyhow::ensure!(
|
||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||
|
||||
@@ -3,10 +3,8 @@
|
||||
use std::future::{ready, Ready};
|
||||
|
||||
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
|
||||
use actix_web::web::Data;
|
||||
use actix_web::Error;
|
||||
use futures_util::future::LocalBoxFuture;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use prometheus::HistogramTimer;
|
||||
|
||||
pub struct RouteMetrics;
|
||||
@@ -49,27 +47,19 @@ where
|
||||
|
||||
fn call(&self, req: ServiceRequest) -> Self::Future {
|
||||
let mut histogram_timer: Option<HistogramTimer> = None;
|
||||
|
||||
// calling unwrap here is safe because index scheduler is added to app data while creating actix app.
|
||||
// also, the tests will fail if this is not present.
|
||||
let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
|
||||
let features = index_scheduler.features();
|
||||
|
||||
if features.check_metrics().is_ok() {
|
||||
let request_path = req.path();
|
||||
let is_registered_resource = req.resource_map().has_resource(request_path);
|
||||
if is_registered_resource {
|
||||
let request_method = req.method().to_string();
|
||||
histogram_timer = Some(
|
||||
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
|
||||
.with_label_values(&[&request_method, request_path])
|
||||
.start_timer(),
|
||||
);
|
||||
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
|
||||
let request_path = req.path();
|
||||
let is_registered_resource = req.resource_map().has_resource(request_path);
|
||||
if is_registered_resource {
|
||||
let request_method = req.method().to_string();
|
||||
histogram_timer = Some(
|
||||
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
|
||||
.with_label_values(&[&request_method, request_path])
|
||||
.inc();
|
||||
}
|
||||
};
|
||||
.start_timer(),
|
||||
);
|
||||
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
|
||||
.with_label_values(&[&request_method, request_path])
|
||||
.inc();
|
||||
}
|
||||
|
||||
let fut = self.service.call(req);
|
||||
|
||||
|
||||
@@ -29,12 +29,12 @@ async fn get_features(
|
||||
>,
|
||||
req: HttpRequest,
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> HttpResponse {
|
||||
let features = index_scheduler.features();
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let features = index_scheduler.features()?;
|
||||
|
||||
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
|
||||
debug!("returns: {:?}", features.runtime_features());
|
||||
HttpResponse::Ok().json(features.runtime_features())
|
||||
Ok(HttpResponse::Ok().json(features.runtime_features()))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
@@ -44,10 +44,6 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub score_details: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub vector_store: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub metrics: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub export_puffin_reports: Option<bool>,
|
||||
}
|
||||
|
||||
async fn patch_features(
|
||||
@@ -59,36 +55,26 @@ async fn patch_features(
|
||||
req: HttpRequest,
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let features = index_scheduler.features();
|
||||
let features = index_scheduler.features()?;
|
||||
|
||||
let old_features = features.runtime_features();
|
||||
|
||||
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
|
||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
||||
export_puffin_reports: new_features
|
||||
.0
|
||||
.export_puffin_reports
|
||||
.unwrap_or(old_features.export_puffin_reports),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
// the it renames to camelCase, which we don't want for analytics.
|
||||
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
score_details,
|
||||
vector_store,
|
||||
metrics,
|
||||
export_puffin_reports,
|
||||
} = new_features;
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } =
|
||||
new_features;
|
||||
|
||||
analytics.publish(
|
||||
"Experimental features Updated".to_string(),
|
||||
json!({
|
||||
"score_details": score_details,
|
||||
"vector_store": vector_store,
|
||||
"metrics": metrics,
|
||||
"export_puffin_reports": export_puffin_reports,
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
@@ -68,7 +68,7 @@ pub async fn search(
|
||||
}
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features();
|
||||
let features = index_scheduler.features()?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_facet_search(&index, search_query, facet_query, facet_name, features)
|
||||
})
|
||||
|
||||
@@ -157,7 +157,7 @@ pub async fn search_with_url_query(
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features();
|
||||
let features = index_scheduler.features()?;
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
@@ -192,7 +192,7 @@ pub async fn search_with_post(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let features = index_scheduler.features();
|
||||
let features = index_scheduler.features()?;
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
|
||||
@@ -19,7 +19,7 @@ pub async fn get_metrics(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: Data<AuthController>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_metrics()?;
|
||||
index_scheduler.features()?.check_metrics()?;
|
||||
let auth_filters = index_scheduler.filters();
|
||||
if !auth_filters.all_indexes_authorized() {
|
||||
let mut error = ResponseError::from(AuthenticationError::InvalidToken);
|
||||
|
||||
@@ -24,7 +24,6 @@ pub mod features;
|
||||
pub mod indexes;
|
||||
mod metrics;
|
||||
mod multi_search;
|
||||
mod reports;
|
||||
mod snapshot;
|
||||
mod swap_indexes;
|
||||
pub mod tasks;
|
||||
@@ -41,8 +40,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::scope("/multi-search").configure(multi_search::configure))
|
||||
.service(web::scope("/swap-indexes").configure(swap_indexes::configure))
|
||||
.service(web::scope("/metrics").configure(metrics::configure))
|
||||
.service(web::scope("/experimental-features").configure(features::configure))
|
||||
.service(web::scope("/reports").configure(reports::configure));
|
||||
.service(web::scope("/experimental-features").configure(features::configure));
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
|
||||
@@ -41,7 +41,7 @@ pub async fn multi_search_with_post(
|
||||
let queries = params.into_inner().queries;
|
||||
|
||||
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
|
||||
let features = index_scheduler.features();
|
||||
let features = index_scheduler.features()?;
|
||||
|
||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::HttpResponse;
|
||||
use index_scheduler::{IndexScheduler, Report};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::get().to(list_reports))).service(
|
||||
web::scope("/{report_uid}")
|
||||
.service(web::resource("").route(web::get().to(SeqHandler(get_report)))),
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn list_reports(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_ALL }>, Data<IndexScheduler>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let reports = &index_scheduler.reports();
|
||||
let reports = &reports.read().unwrap();
|
||||
let reports: Vec<&Report> = reports.iter().collect();
|
||||
|
||||
Ok(HttpResponse::Ok().json(reports))
|
||||
}
|
||||
|
||||
pub async fn get_report(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_ALL }>, Data<IndexScheduler>>,
|
||||
report_id: web::Path<uuid::Uuid>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let reports = &index_scheduler.reports();
|
||||
let reports = &reports.read().unwrap();
|
||||
let report = reports
|
||||
.find(*report_id)
|
||||
.ok_or(crate::error::MeilisearchHttpError::ReportNotFound(*report_id))?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(report))
|
||||
}
|
||||
@@ -2,12 +2,10 @@ use std::collections::{HashMap, HashSet};
|
||||
|
||||
use ::time::format_description::well_known::Rfc3339;
|
||||
use maplit::{hashmap, hashset};
|
||||
use meilisearch::Opt;
|
||||
use once_cell::sync::Lazy;
|
||||
use tempfile::TempDir;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::common::{default_settings, Server, Value};
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
|
||||
@@ -197,9 +195,7 @@ async fn access_authorized_master_key() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn access_authorized_restricted_index() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
|
||||
let mut server = Server::new_auth_with_options(enable_metrics, dir).await;
|
||||
let mut server = Server::new_auth().await;
|
||||
for ((method, route), actions) in AUTHORIZATIONS.iter() {
|
||||
for action in actions {
|
||||
// create a new API key letting only the needed action.
|
||||
|
||||
@@ -202,10 +202,6 @@ impl Server {
|
||||
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
|
||||
self.service.patch("/experimental-features", value).await
|
||||
}
|
||||
|
||||
pub async fn get_metrics(&self) -> (Value, StatusCode) {
|
||||
self.service.get("/metrics").await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
@@ -225,7 +221,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
skip_index_budget: true,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
},
|
||||
experimental_enable_metrics: false,
|
||||
experimental_enable_metrics: true,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
use meilisearch::Opt;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::common::{default_settings, Server};
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
/// Feature name to test against.
|
||||
@@ -19,9 +16,7 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
"vectorStore": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -31,9 +26,7 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
"vectorStore": true
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -43,9 +36,7 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
"vectorStore": true
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -56,9 +47,7 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
"vectorStore": true
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -69,73 +58,11 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
"vectorStore": true
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn experimental_feature_metrics() {
|
||||
// instance flag for metrics enables metrics at startup
|
||||
let dir = TempDir::new().unwrap();
|
||||
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
|
||||
let server = Server::new_with_options(enable_metrics).await.unwrap();
|
||||
|
||||
let (response, code) = server.get_features().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": true,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = server.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
|
||||
// metrics are not returned in json format
|
||||
// so the test server will return null
|
||||
meili_snap::snapshot!(response, @"null");
|
||||
|
||||
// disabling metrics results in invalid request
|
||||
let (response, code) = server.set_features(json!({"metrics": false})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response["metrics"], @"false");
|
||||
|
||||
let (response, code) = server.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
// enabling metrics via HTTP results in valid request
|
||||
let (response, code) = server.set_features(json!({"metrics": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response["metrics"], @"true");
|
||||
|
||||
let (response, code) = server.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response, @"null");
|
||||
|
||||
// startup without flag respects persisted metrics value
|
||||
let disable_metrics =
|
||||
Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
|
||||
let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
|
||||
let (response, code) = server_no_flag.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response, @"null");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn errors() {
|
||||
let server = Server::new().await;
|
||||
@@ -146,7 +73,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
use meili_snap::snapshot;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{"productId": 1, "shopId": 1},
|
||||
{"productId": 2, "shopId": 1},
|
||||
{"productId": 3, "shopId": 2},
|
||||
{"productId": 4, "shopId": 2},
|
||||
{"productId": 5, "shopId": 3},
|
||||
{"productId": 6, "shopId": 3},
|
||||
{"productId": 7, "shopId": 4},
|
||||
{"productId": 8, "shopId": 4},
|
||||
{"productId": 9, "shopId": 5},
|
||||
{"productId": 10, "shopId": 5}
|
||||
])
|
||||
});
|
||||
|
||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
|
||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
#[actix_rt::test]
|
||||
async fn distinct_search_with_offset_no_ranking() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(Value(response): Value) -> Vec<i64> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[1, 2]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[3, 4]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"1");
|
||||
snapshot!(format!("{:?}", hits), @"[5]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
// This modules contains all the test concerning search. Each particular feature of the search
|
||||
// should be tested in its own module to isolate tests and keep the tests readable.
|
||||
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod facet_search;
|
||||
mod formatted;
|
||||
@@ -817,7 +816,7 @@ async fn experimental_feature_score_details() {
|
||||
},
|
||||
"proximity": {
|
||||
"order": 2,
|
||||
"score": 0.75
|
||||
"score": 0.875
|
||||
},
|
||||
"attribute": {
|
||||
"order": 3,
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{io, str};
|
||||
|
||||
use obkv::KvReader;
|
||||
@@ -20,14 +19,14 @@ use crate::FieldId;
|
||||
pub struct EnrichedDocumentsBatchReader<R> {
|
||||
documents: DocumentsBatchReader<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::ReaderCursor<BufReader<File>>,
|
||||
external_ids: grenad::ReaderCursor<File>,
|
||||
}
|
||||
|
||||
impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
|
||||
pub fn new(
|
||||
documents: DocumentsBatchReader<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::Reader<BufReader<File>>,
|
||||
external_ids: grenad::Reader<File>,
|
||||
) -> Result<Self, Error> {
|
||||
if documents.documents_count() as u64 == external_ids.len() {
|
||||
Ok(EnrichedDocumentsBatchReader {
|
||||
@@ -76,7 +75,7 @@ pub struct EnrichedDocument<'a> {
|
||||
pub struct EnrichedDocumentsBatchCursor<R> {
|
||||
documents: DocumentsBatchCursor<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::ReaderCursor<BufReader<File>>,
|
||||
external_ids: grenad::ReaderCursor<File>,
|
||||
}
|
||||
|
||||
impl<R> EnrichedDocumentsBatchCursor<R> {
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::cmp;
|
||||
|
||||
use crate::{relative_from_absolute_position, Position};
|
||||
|
||||
pub const MAX_DISTANCE: u32 = 4;
|
||||
pub const MAX_DISTANCE: u32 = 8;
|
||||
|
||||
pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
|
||||
if lhs <= rhs {
|
||||
|
||||
@@ -46,27 +46,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
if let Some(distinct_fid) = distinct_fid {
|
||||
let mut excluded = RoaringBitmap::new();
|
||||
let mut results = vec![];
|
||||
let mut skip = 0;
|
||||
for docid in universe.iter() {
|
||||
if results.len() >= length {
|
||||
if results.len() >= from + length {
|
||||
break;
|
||||
}
|
||||
if excluded.contains(docid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
||||
skip += 1;
|
||||
if skip <= from {
|
||||
continue;
|
||||
}
|
||||
|
||||
results.push(docid);
|
||||
}
|
||||
|
||||
let mut all_candidates = universe - excluded;
|
||||
all_candidates.extend(results.iter().copied());
|
||||
|
||||
return Ok(BucketSortOutput {
|
||||
scores: vec![Default::default(); results.len()],
|
||||
docids: results,
|
||||
|
||||
@@ -12,7 +12,8 @@ use super::Word;
|
||||
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
||||
use crate::{
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec,
|
||||
RoaringBitmapLenCodec, SearchContext,
|
||||
};
|
||||
|
||||
/// A cache storing pointers to values in the LMDB databases.
|
||||
@@ -259,6 +260,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
@@ -278,6 +280,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<u64>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
@@ -291,12 +294,23 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_docids_len(&mut self, word: Interned<String>) -> Result<Option<u64>> {
|
||||
DatabaseCache::get_value::<_, _, RoaringBitmapLenCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_docids.remap_data_type::<ByteSlice>(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_prefix_pair_proximity_docids(
|
||||
&mut self,
|
||||
word1: Interned<String>,
|
||||
prefix2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, prefix2),
|
||||
@@ -315,6 +329,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
right: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, left_prefix, right),
|
||||
|
||||
@@ -295,11 +295,11 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
ranking_rules.push(Box::new(Typo::new(None)));
|
||||
}
|
||||
crate::Criterion::Proximity => {
|
||||
if proximity {
|
||||
continue;
|
||||
}
|
||||
proximity = true;
|
||||
ranking_rules.push(Box::new(Proximity::new(None)));
|
||||
// if proximity {
|
||||
continue;
|
||||
// }
|
||||
// proximity = true;
|
||||
// ranking_rules.push(Box::new(Proximity::new(None)));
|
||||
}
|
||||
crate::Criterion::Attribute => {
|
||||
if attribute {
|
||||
|
||||
@@ -265,11 +265,11 @@ pub fn partially_initialized_term_from_word(
|
||||
}
|
||||
|
||||
fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Interned<Phrase>>> {
|
||||
if let Some((l, r)) = split_best_frequency(ctx, word)? {
|
||||
Ok(Some(ctx.phrase_interner.insert(Phrase { words: vec![Some(l), Some(r)] })))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
// if let Some((l, r)) = split_best_frequency(ctx, word)? {
|
||||
// Ok(Some(ctx.phrase_interner.insert(Phrase { words: vec![Some(l), Some(r)] })))
|
||||
// } else {
|
||||
Ok(None)
|
||||
// }
|
||||
}
|
||||
|
||||
impl Interned<QueryTerm> {
|
||||
@@ -416,11 +416,20 @@ fn split_best_frequency(
|
||||
let left = ctx.word_interner.insert(left.to_owned());
|
||||
let right = ctx.word_interner.insert(right.to_owned());
|
||||
|
||||
if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? {
|
||||
if let (Some(l_freq), Some(r_freq)) =
|
||||
(ctx.get_db_word_docids_len(left)?, ctx.get_db_word_docids_len(right)?)
|
||||
{
|
||||
let frequency = l_freq.min(r_freq);
|
||||
if best.map_or(true, |(old, _, _)| frequency > old) {
|
||||
best = Some((frequency, left, right));
|
||||
}
|
||||
}
|
||||
|
||||
// if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? {
|
||||
// if best.map_or(true, |(old, _, _)| frequency > old) {
|
||||
// best = Some((frequency, left, right));
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
Ok(best.map(|(_, left, right)| (left, right)))
|
||||
|
||||
@@ -82,41 +82,41 @@ pub fn located_query_terms_from_tokens(
|
||||
position = position.wrapping_add(7);
|
||||
}
|
||||
|
||||
phrase = 'phrase: {
|
||||
let phrase = phrase.take();
|
||||
// phrase = 'phrase: {
|
||||
// let phrase = phrase.take();
|
||||
|
||||
// If we have a hard separator inside a phrase, we immediately start a new phrase
|
||||
let phrase = if separator_kind == SeparatorKind::Hard {
|
||||
if let Some(phrase) = phrase {
|
||||
if let Some(located_query_term) = phrase.build(ctx) {
|
||||
located_terms.push(located_query_term)
|
||||
}
|
||||
Some(PhraseBuilder::empty())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
phrase
|
||||
};
|
||||
// // If we have a hard separator inside a phrase, we immediately start a new phrase
|
||||
// let phrase = if separator_kind == SeparatorKind::Hard {
|
||||
// if let Some(phrase) = phrase {
|
||||
// if let Some(located_query_term) = phrase.build(ctx) {
|
||||
// located_terms.push(located_query_term)
|
||||
// }
|
||||
// Some(PhraseBuilder::empty())
|
||||
// } else {
|
||||
// None
|
||||
// }
|
||||
// } else {
|
||||
// phrase
|
||||
// };
|
||||
|
||||
// We close and start a new phrase depending on the number of double quotes
|
||||
let mut quote_count = token.lemma().chars().filter(|&s| s == '"').count();
|
||||
if quote_count == 0 {
|
||||
break 'phrase phrase;
|
||||
}
|
||||
// // We close and start a new phrase depending on the number of double quotes
|
||||
// let mut quote_count = token.lemma().chars().filter(|&s| s == '"').count();
|
||||
// if quote_count == 0 {
|
||||
// break 'phrase phrase;
|
||||
// }
|
||||
|
||||
// Consume the closing quote and the phrase
|
||||
if let Some(phrase) = phrase {
|
||||
// Per the check above, quote_count > 0
|
||||
quote_count -= 1;
|
||||
if let Some(located_query_term) = phrase.build(ctx) {
|
||||
located_terms.push(located_query_term)
|
||||
}
|
||||
}
|
||||
// // Consume the closing quote and the phrase
|
||||
// if let Some(phrase) = phrase {
|
||||
// // Per the check above, quote_count > 0
|
||||
// quote_count -= 1;
|
||||
// if let Some(located_query_term) = phrase.build(ctx) {
|
||||
// located_terms.push(located_query_term)
|
||||
// }
|
||||
// }
|
||||
|
||||
// Start new phrase if the token ends with an opening quote
|
||||
(quote_count % 2 == 1).then_some(PhraseBuilder::empty())
|
||||
};
|
||||
// // Start new phrase if the token ends with an opening quote
|
||||
// (quote_count % 2 == 1).then_some(PhraseBuilder::empty())
|
||||
// };
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use super::ProximityCondition;
|
||||
use crate::proximity::MAX_DISTANCE;
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::SearchContext;
|
||||
@@ -36,7 +35,7 @@ pub fn build_edges(
|
||||
}
|
||||
|
||||
let mut conditions = vec![];
|
||||
for cost in right_ngram_max..(((MAX_DISTANCE as usize) - 1) + right_ngram_max) {
|
||||
for cost in right_ngram_max..(7 + right_ngram_max) {
|
||||
conditions.push((
|
||||
cost as u32,
|
||||
conditions_interner.insert(ProximityCondition::Uninit {
|
||||
@@ -48,7 +47,7 @@ pub fn build_edges(
|
||||
}
|
||||
|
||||
conditions.push((
|
||||
((MAX_DISTANCE - 1) + (right_ngram_max as u32)),
|
||||
(7 + right_ngram_max) as u32,
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
));
|
||||
|
||||
|
||||
@@ -273,7 +273,7 @@ fn test_proximity_simple() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 2, 3, 5, 1, 0]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 5, 2, 3, 0, 1]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
@@ -282,11 +282,11 @@ fn test_proximity_simple() {
|
||||
"\"the quickbrown fox jumps over the lazy dog\"",
|
||||
"\"the really quick brown fox jumps over the lazy dog\"",
|
||||
"\"the really quick brown fox jumps over the very lazy dog\"",
|
||||
"\"brown quick fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy. dog\"",
|
||||
"\"dog the quick brown fox jumps over the lazy\"",
|
||||
"\"brown quick fox jumps over the lazy dog\"",
|
||||
"\"the. quick brown fox jumps over the lazy. dog\"",
|
||||
"\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
|
||||
"\"the. quick brown fox jumps over the lazy. dog\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
@@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best s");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11, 15]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@@ -382,9 +382,9 @@ fn test_proximity_prefix_db() {
|
||||
"\"summer best\"",
|
||||
"\"this is the best meal of summer\"",
|
||||
"\"summer x best\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"summer x y best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
@@ -396,7 +396,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best su");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 11, 7, 6, 15]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@@ -406,10 +406,10 @@ fn test_proximity_prefix_db() {
|
||||
"\"summer best\"",
|
||||
"\"this is the best meal of summer\"",
|
||||
"\"summer x best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"summer x y best\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
"###);
|
||||
@@ -447,7 +447,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best wint");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 20, 16, 15]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@@ -457,10 +457,10 @@ fn test_proximity_prefix_db() {
|
||||
"\"winter best\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"winter x best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"winter x y best\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
@@ -471,7 +471,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best wi");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 15, 16, 20]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@@ -481,9 +481,9 @@ fn test_proximity_prefix_db() {
|
||||
"\"winter best\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"winter x best\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"winter x y best\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
@@ -68,8 +68,8 @@ fn test_trap_basic() {
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
@@ -82,8 +82,8 @@ fn test_trap_basic() {
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
|
||||
@@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -101,8 +101,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -153,8 +153,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -179,8 +179,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 9,
|
||||
max_rank: 10,
|
||||
rank: 21,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -205,8 +205,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 10,
|
||||
rank: 17,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -231,8 +231,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 10,
|
||||
rank: 17,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -3,35 +3,59 @@ source: milli/src/search/new/tests/proximity.rs
|
||||
expression: "format!(\"{document_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -39,31 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -6,32 +6,40 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -39,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -47,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -55,15 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -6,32 +6,40 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -39,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -47,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -55,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -63,15 +71,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -3,35 +3,59 @@ source: milli/src/search/new/tests/proximity.rs
|
||||
expression: "format!(\"{document_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -39,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -47,31 +71,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -7,7 +7,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -15,7 +15,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -47,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -55,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -63,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -6,24 +6,24 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 7,
|
||||
rank: 8,
|
||||
max_rank: 15,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 50,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 50,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 21,
|
||||
max_rank: 22,
|
||||
rank: 49,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 21,
|
||||
max_rank: 22,
|
||||
rank: 49,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 20,
|
||||
max_rank: 22,
|
||||
rank: 48,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 17,
|
||||
max_rank: 22,
|
||||
rank: 41,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 16,
|
||||
max_rank: 22,
|
||||
rank: 40,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 16,
|
||||
max_rank: 16,
|
||||
rank: 36,
|
||||
max_rank: 36,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 16,
|
||||
rank: 31,
|
||||
max_rank: 36,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -166,8 +166,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 18,
|
||||
max_rank: 19,
|
||||
rank: 42,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 18,
|
||||
max_rank: 19,
|
||||
rank: 42,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 17,
|
||||
max_rank: 19,
|
||||
rank: 41,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 14,
|
||||
max_rank: 19,
|
||||
rank: 34,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 19,
|
||||
rank: 33,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 16,
|
||||
max_rank: 16,
|
||||
rank: 36,
|
||||
max_rank: 36,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 13,
|
||||
rank: 29,
|
||||
max_rank: 29,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 13,
|
||||
rank: 24,
|
||||
max_rank: 29,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 23,
|
||||
max_rank: 25,
|
||||
rank: 55,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
rank: 52,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
rank: 51,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
rank: 48,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
rank: 47,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 25,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -178,6 +178,62 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 50,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 38,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 5,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 29,
|
||||
max_rank: 29,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 4,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
@@ -188,42 +244,14 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
matching_words: 4,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 16,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 5,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 13,
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -236,36 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 4,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 4,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 23,
|
||||
max_rank: 25,
|
||||
rank: 55,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
rank: 52,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 18,
|
||||
max_rank: 25,
|
||||
rank: 47,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 18,
|
||||
max_rank: 25,
|
||||
rank: 45,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 25,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -178,6 +178,62 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 47,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 40,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 35,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 5,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 26,
|
||||
max_rank: 29,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 4,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
@@ -188,42 +244,14 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
matching_words: 4,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 16,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 5,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 13,
|
||||
rank: 19,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -236,36 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 4,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 4,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 10,
|
||||
rank: 19,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 7,
|
||||
rank: 13,
|
||||
max_rank: 15,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -6,88 +6,88 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 23,
|
||||
max_rank: 25,
|
||||
rank: 55,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
rank: 52,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
rank: 51,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
rank: 48,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
rank: 47,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
@@ -95,7 +95,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 25,
|
||||
max_rank: 57,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
||||
@@ -259,8 +259,8 @@ fn test_ignore_stop_words() {
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
Fid(
|
||||
@@ -411,8 +411,8 @@ fn test_stop_words_in_phrase() {
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
Fid(
|
||||
|
||||
@@ -277,7 +277,7 @@ fn test_words_proximity_tms_last_simple() {
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
|
||||
// 7 is better than 6 because of the proximity between "the" and its surrounding terms
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
@@ -289,10 +289,10 @@ fn test_words_proximity_tms_last_simple() {
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
@@ -312,7 +312,7 @@ fn test_words_proximity_tms_last_simple() {
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
|
||||
// 10 is better than 9 because of the proximity between "quick" and "brown"
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 15, 16, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
@@ -326,8 +326,8 @@ fn test_words_proximity_tms_last_simple() {
|
||||
"\"the great quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
@@ -427,7 +427,7 @@ fn test_words_tms_all() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
@@ -439,10 +439,10 @@ fn test_words_tms_all() {
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
@@ -108,17 +108,15 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
self.delete_document(docid);
|
||||
Some(docid)
|
||||
}
|
||||
|
||||
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
|
||||
self.execute_inner()?;
|
||||
|
||||
Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
|
||||
}
|
||||
|
||||
pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
|
||||
puffin::profile_function!();
|
||||
|
||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||
|
||||
// We retrieve the current documents ids that are in the database.
|
||||
@@ -478,8 +476,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
C: for<'a> BytesDecode<'a, DItem = RoaringBitmap>
|
||||
+ for<'a> BytesEncode<'a, EItem = RoaringBitmap>,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
while let Some(result) = iter.next() {
|
||||
let (bytes, mut docids) = result?;
|
||||
let previous_len = docids.len();
|
||||
@@ -502,8 +498,6 @@ fn remove_from_word_prefix_docids(
|
||||
db: &Database<Str, RoaringBitmapCodec>,
|
||||
to_remove: &RoaringBitmap,
|
||||
) -> Result<fst::Set<Vec<u8>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut prefixes_to_delete = fst::SetBuilder::memory();
|
||||
|
||||
// We iterate over the word prefix docids database and remove the deleted documents ids
|
||||
@@ -534,8 +528,6 @@ fn remove_from_word_docids(
|
||||
words_to_keep: &mut BTreeSet<String>,
|
||||
words_to_remove: &mut BTreeSet<String>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
// We create an iterator to be able to get the content and delete the word docids.
|
||||
// It's faster to acquire a cursor to get and delete or put, as we avoid traversing
|
||||
// the LMDB B-Tree two times but only once.
|
||||
@@ -567,8 +559,6 @@ fn remove_docids_from_field_id_docid_facet_value(
|
||||
field_id: FieldId,
|
||||
to_remove: &RoaringBitmap,
|
||||
) -> heed::Result<HashSet<Vec<u8>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let db = match facet_type {
|
||||
FacetType::String => {
|
||||
index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
|
||||
@@ -604,8 +594,6 @@ fn remove_docids_from_facet_id_docids<'a, C>(
|
||||
where
|
||||
C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
|
||||
while let Some(result) = iter.next() {
|
||||
let (bytes, mut docids) = result?;
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use grenad::CompressionType;
|
||||
use heed::types::ByteSlice;
|
||||
@@ -31,7 +30,7 @@ pub struct FacetsUpdateBulk<'i> {
|
||||
facet_type: FacetType,
|
||||
field_ids: Vec<FieldId>,
|
||||
// None if level 0 does not need to be updated
|
||||
new_data: Option<grenad::Reader<BufReader<File>>>,
|
||||
new_data: Option<grenad::Reader<File>>,
|
||||
}
|
||||
|
||||
impl<'i> FacetsUpdateBulk<'i> {
|
||||
@@ -39,7 +38,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
index: &'i Index,
|
||||
field_ids: Vec<FieldId>,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
new_data: grenad::Reader<File>,
|
||||
group_size: u8,
|
||||
min_level_size: u8,
|
||||
) -> FacetsUpdateBulk<'i> {
|
||||
@@ -188,7 +187,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
txn: &RoTxn,
|
||||
) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> {
|
||||
) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
|
||||
let mut all_docids = RoaringBitmap::new();
|
||||
let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
|
||||
for bitmap in bitmaps {
|
||||
@@ -260,7 +259,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
field_id: u16,
|
||||
level: u8,
|
||||
handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
|
||||
) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
|
||||
) -> Result<Vec<grenad::Reader<File>>> {
|
||||
if level == 0 {
|
||||
self.read_level_0(rtxn, field_id, handle_group)?;
|
||||
// Level 0 is already in the database
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
||||
@@ -35,14 +34,14 @@ pub struct FacetsUpdateIncremental<'i> {
|
||||
index: &'i Index,
|
||||
inner: FacetsUpdateIncrementalInner,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
new_data: grenad::Reader<File>,
|
||||
}
|
||||
|
||||
impl<'i> FacetsUpdateIncremental<'i> {
|
||||
pub fn new(
|
||||
index: &'i Index,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
new_data: grenad::Reader<File>,
|
||||
group_size: u8,
|
||||
min_level_size: u8,
|
||||
max_group_size: u8,
|
||||
|
||||
@@ -78,7 +78,6 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use charabia::normalizer::{Normalize, NormalizerOption};
|
||||
@@ -109,17 +108,13 @@ pub struct FacetsUpdate<'i> {
|
||||
index: &'i Index,
|
||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
new_data: grenad::Reader<File>,
|
||||
group_size: u8,
|
||||
max_group_size: u8,
|
||||
min_level_size: u8,
|
||||
}
|
||||
impl<'i> FacetsUpdate<'i> {
|
||||
pub fn new(
|
||||
index: &'i Index,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
) -> Self {
|
||||
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
||||
let database = match facet_type {
|
||||
FacetType::String => index
|
||||
.facet_id_string_docids
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::io::{BufWriter, Read, Seek};
|
||||
use std::io::{Read, Seek};
|
||||
use std::result::Result as StdResult;
|
||||
use std::{fmt, iter};
|
||||
|
||||
@@ -35,7 +35,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
|
||||
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
|
||||
let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
|
||||
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
|
||||
let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];
|
||||
|
||||
// The primary key *field id* that has already been set for this index or the one
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{io, mem, str};
|
||||
|
||||
use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
|
||||
@@ -32,7 +31,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
allowed_separators: Option<&[&str]>,
|
||||
dictionary: Option<&[&str]>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_positions_per_attributes = max_positions_per_attributes
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
@@ -19,7 +19,7 @@ use crate::Result;
|
||||
pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_number: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use heed::BytesEncode;
|
||||
|
||||
@@ -17,7 +17,7 @@ use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
use std::mem::size_of;
|
||||
|
||||
use heed::zerocopy::AsBytes;
|
||||
@@ -17,11 +17,11 @@ use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET
|
||||
|
||||
/// The extracted facet values stored in grenad files by type.
|
||||
pub struct ExtractedFacetValues {
|
||||
pub docid_fid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub docid_fid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
|
||||
pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
|
||||
pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
|
||||
pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
|
||||
pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
|
||||
}
|
||||
|
||||
/// Extracts the facet values of each faceted field of each document.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use grenad::Sorter;
|
||||
|
||||
@@ -21,7 +21,7 @@ use crate::{relative_from_absolute_position, DocumentId, FieldId, Result};
|
||||
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use concat_arrays::concat_arrays;
|
||||
use serde_json::Value;
|
||||
@@ -18,7 +18,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
primary_key_id: FieldId,
|
||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::convert::TryFrom;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use bytemuck::cast_slice;
|
||||
use serde_json::{from_slice, Value};
|
||||
@@ -18,7 +18,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
primary_key_id: FieldId,
|
||||
vectors_fid: FieldId,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -26,7 +26,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
exact_attributes: &HashSet<FieldId>,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
|
||||
@@ -14,7 +14,7 @@ use crate::{relative_from_absolute_position, DocumentId, Result};
|
||||
pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{BinaryHeap, HashMap};
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{cmp, io, mem, str, vec};
|
||||
|
||||
use super::helpers::{
|
||||
@@ -21,7 +20,7 @@ use crate::{DocumentId, Result};
|
||||
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
|
||||
@@ -17,7 +17,7 @@ use crate::{bucketed_position, relative_from_absolute_position, DocumentId, Resu
|
||||
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -12,7 +12,6 @@ mod extract_word_position_docids;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use crossbeam_channel::Sender;
|
||||
use log::debug;
|
||||
@@ -40,8 +39,8 @@ use crate::{FieldId, Result};
|
||||
/// Send data in grenad file over provided Sender.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn data_from_obkv_documents(
|
||||
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
|
||||
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
|
||||
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
|
||||
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
searchable_fields: Option<HashSet<FieldId>>,
|
||||
@@ -153,17 +152,17 @@ pub(crate) fn data_from_obkv_documents(
|
||||
});
|
||||
}
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
extract_word_pair_proximity_docids,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
TypedChunk::WordPairProximityDocids,
|
||||
"word-pair-proximity-docids",
|
||||
);
|
||||
// spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
// docid_word_positions_chunks.clone(),
|
||||
// indexer,
|
||||
// lmdb_writer_sx.clone(),
|
||||
// extract_word_pair_proximity_docids,
|
||||
// merge_cbo_roaring_bitmaps,
|
||||
// TypedChunk::WordPairProximityDocids,
|
||||
// "word-pair-proximity-docids",
|
||||
// );
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@@ -173,11 +172,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"field-id-wordcount-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<
|
||||
_,
|
||||
_,
|
||||
Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)>,
|
||||
>(
|
||||
spawn_extraction_task::<_, _, Vec<(grenad::Reader<File>, grenad::Reader<File>)>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@@ -190,7 +185,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"word-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@@ -199,7 +194,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
TypedChunk::WordPositionDocids,
|
||||
"word-position-docids",
|
||||
);
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
docid_word_positions_chunks,
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@@ -209,7 +204,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"word-fid-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
docid_fid_facet_strings_chunks,
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@@ -219,7 +214,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"field-id-facet-string-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
docid_fid_facet_numbers_chunks,
|
||||
indexer,
|
||||
lmdb_writer_sx,
|
||||
@@ -274,7 +269,7 @@ fn spawn_extraction_task<FE, FS, M>(
|
||||
/// Extract chunked data and send it into lmdb_writer_sx sender:
|
||||
/// - documents
|
||||
fn send_original_documents_data(
|
||||
original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
||||
original_documents_chunk: Result<grenad::Reader<File>>,
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
@@ -316,7 +311,7 @@ fn send_original_documents_data(
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn send_and_extract_flattened_documents_data(
|
||||
flattened_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
||||
flattened_documents_chunk: Result<grenad::Reader<File>>,
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
@@ -333,10 +328,7 @@ fn send_and_extract_flattened_documents_data(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
(
|
||||
grenad::Reader<BufReader<File>>,
|
||||
(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>),
|
||||
),
|
||||
(grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
|
||||
),
|
||||
),
|
||||
)> {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader, BufWriter, Seek};
|
||||
use std::io::{self, Seek};
|
||||
use std::time::Instant;
|
||||
|
||||
use grenad::{CompressionType, Sorter};
|
||||
@@ -17,13 +17,13 @@ pub fn create_writer<R: io::Write>(
|
||||
typ: grenad::CompressionType,
|
||||
level: Option<u32>,
|
||||
file: R,
|
||||
) -> grenad::Writer<BufWriter<R>> {
|
||||
) -> grenad::Writer<R> {
|
||||
let mut builder = grenad::Writer::builder();
|
||||
builder.compression_type(typ);
|
||||
if let Some(level) = level {
|
||||
builder.compression_level(level);
|
||||
}
|
||||
builder.build(BufWriter::new(file))
|
||||
builder.build(file)
|
||||
}
|
||||
|
||||
pub fn create_sorter(
|
||||
@@ -53,7 +53,7 @@ pub fn create_sorter(
|
||||
pub fn sorter_into_reader(
|
||||
sorter: grenad::Sorter<MergeFn>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
@@ -64,18 +64,16 @@ pub fn sorter_into_reader(
|
||||
writer_into_reader(writer)
|
||||
}
|
||||
|
||||
pub fn writer_into_reader(
|
||||
writer: grenad::Writer<BufWriter<File>>,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
let mut file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
|
||||
pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader<File>> {
|
||||
let mut file = writer.into_inner()?;
|
||||
file.rewind()?;
|
||||
grenad::Reader::new(BufReader::new(file)).map_err(Into::into)
|
||||
grenad::Reader::new(file).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub unsafe fn as_cloneable_grenad(
|
||||
reader: &grenad::Reader<BufReader<File>>,
|
||||
reader: &grenad::Reader<File>,
|
||||
) -> Result<grenad::Reader<CursorClonableMmap>> {
|
||||
let file = reader.get_ref().get_ref();
|
||||
let file = reader.get_ref();
|
||||
let mmap = memmap2::Mmap::map(file)?;
|
||||
let cursor = io::Cursor::new(ClonableMmap::from(mmap));
|
||||
let reader = grenad::Reader::new(cursor)?;
|
||||
@@ -91,8 +89,8 @@ where
|
||||
fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
|
||||
}
|
||||
|
||||
impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
|
||||
type Output = grenad::Reader<BufReader<File>>;
|
||||
impl MergeableReader for Vec<grenad::Reader<File>> {
|
||||
type Output = grenad::Reader<File>;
|
||||
|
||||
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
|
||||
let mut merger = MergerBuilder::new(merge_fn);
|
||||
@@ -101,8 +99,8 @@ impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
|
||||
}
|
||||
}
|
||||
|
||||
impl MergeableReader for Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
type Output = (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>);
|
||||
impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||
type Output = (grenad::Reader<File>, grenad::Reader<File>);
|
||||
|
||||
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
|
||||
let mut m1 = MergerBuilder::new(merge_fn);
|
||||
@@ -127,7 +125,7 @@ impl<R: io::Read + io::Seek> MergerBuilder<R> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<File>> {
|
||||
let merger = self.0.build();
|
||||
let mut writer = create_writer(
|
||||
params.chunk_compression_type,
|
||||
@@ -178,7 +176,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
|
||||
reader: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
documents_chunk_size: usize,
|
||||
) -> Result<impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>>> {
|
||||
) -> Result<impl Iterator<Item = Result<grenad::Reader<File>>>> {
|
||||
let mut continue_reading = true;
|
||||
let mut cursor = reader.into_cursor()?;
|
||||
|
||||
|
||||
@@ -659,10 +659,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
new_documents_ids: self.new_documents_ids,
|
||||
replaced_documents_ids: self.replaced_documents_ids,
|
||||
documents_count: self.documents_count,
|
||||
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
|
||||
flattened_documents: flattened_documents
|
||||
.into_inner()
|
||||
.map_err(|err| err.into_error())?,
|
||||
original_documents,
|
||||
flattened_documents,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -781,10 +779,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
new_documents_ids: documents_ids,
|
||||
replaced_documents_ids: RoaringBitmap::default(),
|
||||
documents_count,
|
||||
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
|
||||
flattened_documents: flattened_documents
|
||||
.into_inner()
|
||||
.map_err(|err| err.into_error())?,
|
||||
original_documents,
|
||||
flattened_documents,
|
||||
};
|
||||
|
||||
let new_facets = output.compute_real_facets(wtxn, self.index)?;
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use bytemuck::allocation::pod_collect_to_vec;
|
||||
use charabia::{Language, Script};
|
||||
@@ -27,22 +27,22 @@ pub(crate) enum TypedChunk {
|
||||
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
|
||||
FieldIdDocidFacetNumbers(grenad::Reader<CursorClonableMmap>),
|
||||
Documents(grenad::Reader<CursorClonableMmap>),
|
||||
FieldIdWordcountDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdWordcountDocids(grenad::Reader<File>),
|
||||
NewDocumentsIds(RoaringBitmap),
|
||||
WordDocids {
|
||||
word_docids_reader: grenad::Reader<BufReader<File>>,
|
||||
exact_word_docids_reader: grenad::Reader<BufReader<File>>,
|
||||
word_docids_reader: grenad::Reader<File>,
|
||||
exact_word_docids_reader: grenad::Reader<File>,
|
||||
},
|
||||
WordPositionDocids(grenad::Reader<BufReader<File>>),
|
||||
WordFidDocids(grenad::Reader<BufReader<File>>),
|
||||
WordPairProximityDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetStringDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetNumberDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetExistsDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetIsNullDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
|
||||
GeoPoints(grenad::Reader<BufReader<File>>),
|
||||
VectorPoints(grenad::Reader<BufReader<File>>),
|
||||
WordPositionDocids(grenad::Reader<File>),
|
||||
WordFidDocids(grenad::Reader<File>),
|
||||
WordPairProximityDocids(grenad::Reader<File>),
|
||||
FieldIdFacetStringDocids(grenad::Reader<File>),
|
||||
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
||||
FieldIdFacetExistsDocids(grenad::Reader<File>),
|
||||
FieldIdFacetIsNullDocids(grenad::Reader<File>),
|
||||
FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
|
||||
GeoPoints(grenad::Reader<File>),
|
||||
VectorPoints(grenad::Reader<File>),
|
||||
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashSet;
|
||||
use std::io::{BufReader, BufWriter};
|
||||
use std::io::BufReader;
|
||||
|
||||
use grenad::CompressionType;
|
||||
use heed::types::ByteSlice;
|
||||
@@ -119,9 +119,9 @@ pub fn insert_into_database(
|
||||
pub fn write_into_lmdb_database_without_merging(
|
||||
wtxn: &mut heed::RwTxn,
|
||||
database: heed::PolyDatabase,
|
||||
writer: grenad::Writer<BufWriter<std::fs::File>>,
|
||||
writer: grenad::Writer<std::fs::File>,
|
||||
) -> Result<()> {
|
||||
let file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
|
||||
let file = writer.into_inner()?;
|
||||
let reader = grenad::Reader::new(BufReader::new(file))?;
|
||||
if database.is_empty(wtxn)? {
|
||||
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
|
||||
|
||||
@@ -20,4 +20,7 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 at a [100, ]
|
||||
3 rings a [101, ]
|
||||
3 the a [101, ]
|
||||
4 at b [100, ]
|
||||
4 at be [100, ]
|
||||
4 bell a [101, ]
|
||||
|
||||
|
||||
@@ -30,4 +30,10 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 bell 5 [101, ]
|
||||
3 rings am [101, ]
|
||||
3 the at [101, ]
|
||||
4 an house [100, ]
|
||||
4 at beautiful [100, ]
|
||||
4 bell am [101, ]
|
||||
4 the 5 [101, ]
|
||||
5 at house [100, ]
|
||||
5 the am [101, ]
|
||||
|
||||
|
||||
@@ -28,4 +28,8 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 rings a [101, ]
|
||||
3 rings am [101, ]
|
||||
3 the a [101, ]
|
||||
4 at b [100, ]
|
||||
4 at be [100, ]
|
||||
4 bell a [101, ]
|
||||
4 bell am [101, ]
|
||||
|
||||
|
||||
@@ -7,4 +7,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
2 bell a [51, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
||||
@@ -12,4 +12,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 at a [50, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
||||
@@ -7,4 +7,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
2 bell a [51, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
||||
@@ -12,4 +12,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 at a [50, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
||||
@@ -12,4 +12,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 at a [50, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user