mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-18 12:20:48 +00:00
Compare commits
80 Commits
hackaton-r
...
simplify-r
Author | SHA1 | Date | |
---|---|---|---|
0a34f70566 | |||
75d8d4f3a8 | |||
62ea81bef6 | |||
f28f09ae2f | |||
eae9eab181 | |||
cf8dad1ca0 | |||
dd619913da | |||
9b55ff16e9 | |||
e761db582f | |||
d8c649b3cd | |||
5e0485d8dd | |||
27eec21415 | |||
62cc97ba70 | |||
fed59cc1d5 | |||
2b3adef796 | |||
956cfc5487 | |||
12fc878640 | |||
0a2e8b92a9 | |||
c7a3f80de6 | |||
029d4de043 | |||
549f1bcccf | |||
689ec7c7ad | |||
3655d4bdca | |||
055ca3935b | |||
1b8871a585 | |||
bf8fac6676 | |||
f2a9e1ebbb | |||
c45c6cf54c | |||
513e61e9a3 | |||
90a626bf80 | |||
0d4acf2daa | |||
58db8d85ec | |||
62dfd09dc6 | |||
656dadabea | |||
c5f7893fbb | |||
8cf2ccf168 | |||
0913373a5e | |||
1a7f1282af | |||
bc747aac3a | |||
be92376ab3 | |||
cf7e355735 | |||
5f09d89ad1 | |||
6ecb26a3f8 | |||
76c6f554d6 | |||
f343ef5f2f | |||
96982a768a | |||
fca78fbc46 | |||
67a678cfb6 | |||
d1331d8abf | |||
19ba129165 | |||
d4da06ff47 | |||
3e0471edae | |||
432df03c4c | |||
11958016dd | |||
63c250a04d | |||
06d8cd5b72 | |||
c0f2724c2d | |||
d772073dfa | |||
8fe8ddea79 | |||
8a95bf28e5 | |||
c0fd3dffb8 | |||
c42fd5375f | |||
b418c3a756 | |||
1cde455758 | |||
ca19bae72f | |||
705878ff59 | |||
92c280d1c8 | |||
181e7a1e53 | |||
2e5abb4d2c | |||
44aaf5d9e3 | |||
ff0ababf65 | |||
c5336af1c5 | |||
1567758a56 | |||
37953afe1a | |||
43989fe2e4 | |||
de3f992ae4 | |||
c668a29ed5 | |||
98f0618065 | |||
b10eeb0e41 | |||
4a8515e9fc |
8
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
8
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
@ -7,19 +7,17 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
|
||||
Related product team resources: [PRD]() (_internal only_)
|
||||
Related product discussion:
|
||||
Related spec: WIP
|
||||
|
||||
## Motivation
|
||||
|
||||
<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
|
||||
<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->
|
||||
|
||||
## Usage
|
||||
|
||||
<!---Write a quick description of the usage if the usage has already been defined-->
|
||||
|
||||
Refer to the final spec to know the details and the final decisions about the usage.
|
||||
<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->
|
||||
|
||||
## TODO
|
||||
|
||||
|
2
.github/workflows/benchmarks-manual.yml
vendored
2
.github/workflows/benchmarks-manual.yml
vendored
@ -74,4 +74,4 @@ jobs:
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
|
||||
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
|
||||
echo " - Run the following command: ./benchmaks/scripts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
|
||||
|
97
.github/workflows/benchmarks-pr.yml
vendored
Normal file
97
.github/workflows/benchmarks-pr.yml
vendored
Normal file
@ -0,0 +1,97 @@
|
||||
name: Benchmarks (PR)
|
||||
on: issue_comment
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
|
||||
jobs:
|
||||
run-benchmarks-on-comment:
|
||||
if: startsWith(github.event.comment.body, '/benchmark')
|
||||
name: Run and upload benchmarks
|
||||
runs-on: benchmarks
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Check for Command
|
||||
id: command
|
||||
uses: xt0rted/slash-command-action@v2
|
||||
with:
|
||||
command: benchmark
|
||||
reaction-type: "eyes"
|
||||
repo-token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: xt0rted/pull-request-comment-branch@v2
|
||||
id: comment-branch
|
||||
with:
|
||||
repo_token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
if: success()
|
||||
with:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
shell: bash
|
||||
run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT
|
||||
id: current_branch
|
||||
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
|
||||
shell: bash
|
||||
run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT
|
||||
id: normalized_current_branch
|
||||
- name: Set shorter commit SHA
|
||||
shell: bash
|
||||
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
|
||||
id: commit_sha
|
||||
- name: Set file basename with format "dataset_branch_commitSHA"
|
||||
shell: bash
|
||||
run: echo "basename=$(echo ${{ steps.command.outputs.command-arguments }}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
|
||||
id: file
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${{ steps.command.outputs.command-arguments }} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
|
||||
run: |
|
||||
cd benchmarks
|
||||
cargo bench --bench ${{ steps.command.outputs.command-arguments }} -- --save-baseline ${{ steps.file.outputs.basename }}
|
||||
|
||||
# Generate critcmp files
|
||||
- name: Install critcmp
|
||||
uses: taiki-e/install-action@v2
|
||||
with:
|
||||
tool: critcmp
|
||||
- name: Export cripcmp file
|
||||
run: |
|
||||
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
|
||||
|
||||
# Upload benchmarks
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
|
||||
uses: BetaHuhn/do-spaces-action@v2
|
||||
with:
|
||||
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
|
||||
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
|
||||
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
|
||||
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
|
||||
source: ${{ steps.file.outputs.basename }}.json
|
||||
out_dir: critcmp_results
|
||||
|
||||
# Compute the diff of the benchmarks and send a message on the GitHub PR
|
||||
- name: Compute and send a message in the PR
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
run: |
|
||||
set -x
|
||||
export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
|
||||
export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
|
||||
echo 'Here are your benchmarks diff 👊' >> body.txt
|
||||
echo '```' >> body.txt
|
||||
./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
|
||||
echo '```' >> body.txt
|
||||
gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt
|
8
.github/workflows/publish-docker-images.yml
vendored
8
.github/workflows/publish-docker-images.yml
vendored
@ -57,10 +57,10 @@ jobs:
|
||||
echo "date=$commit_date" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
@ -70,7 +70,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: getmeili/meilisearch
|
||||
# Prevent `latest` to be updated for each new tag pushed.
|
||||
@ -83,7 +83,7 @@ jobs:
|
||||
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v4
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
2
.github/workflows/test-suite.yml
vendored
2
.github/workflows/test-suite.yml
vendored
@ -183,7 +183,7 @@ jobs:
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly
|
||||
toolchain: 1.71.1
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
|
@ -1,5 +1,3 @@
|
||||
unstable_features = true
|
||||
|
||||
use_small_heuristics = "max"
|
||||
imports_granularity = "Module"
|
||||
group_imports = "StdExternalCrate"
|
||||
|
51
Cargo.lock
generated
51
Cargo.lock
generated
@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@ -1206,7 +1206,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -1417,7 +1417,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
@ -1439,7 +1439,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@ -1459,7 +1459,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -1577,7 +1577,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap",
|
||||
@ -1891,7 +1891,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -2088,7 +2088,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -2500,7 +2500,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@ -2509,7 +2509,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@ -2564,7 +2564,6 @@ dependencies = [
|
||||
"platform-dirs",
|
||||
"prometheus",
|
||||
"puffin",
|
||||
"puffin_http",
|
||||
"rand",
|
||||
"rayon",
|
||||
"regex",
|
||||
@ -2600,7 +2599,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"base64 0.21.2",
|
||||
"enum-iterator",
|
||||
@ -2619,7 +2618,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@ -2673,7 +2672,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"bimap",
|
||||
@ -2995,7 +2994,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@ -3193,7 +3192,7 @@ dependencies = [
|
||||
"byteorder",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"rustix 0.36.15",
|
||||
"rustix 0.36.16",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3236,18 +3235,6 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "puffin_http"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13bffc600c35913d282ae1e96a6ffcdf36dc7a7cdb9310e0ba15914d258c8193"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"crossbeam-channel",
|
||||
"log",
|
||||
"puffin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.32"
|
||||
@ -3478,9 +3465,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.36.15"
|
||||
version = "0.36.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c37f1bd5ef1b5422177b7646cba67430579cfe2ace80f284fee876bca52ad941"
|
||||
checksum = "6da3636faa25820d8648e0e31c5d519bbb01f72fdf57131f0f5f7da5fed36eab"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"errno",
|
||||
@ -4443,9 +4430,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "webpki"
|
||||
version = "0.22.1"
|
||||
version = "0.22.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0e74f82d49d545ad128049b7e88f6576df2da6b02e9ce565c6f533be576957e"
|
||||
checksum = "07ecc0cd7cac091bf682ec5efa18b1cff79d617b84181f38b3951dbe135f607f"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"untrusted",
|
||||
|
@ -18,7 +18,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
@ -1,14 +1,14 @@
|
||||
# Profiling Meilisearch
|
||||
|
||||
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
|
||||
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).
|
||||
|
||||

|
||||
|
||||
## Profiling the Indexing Process
|
||||
|
||||
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||
When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||
|
||||
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
|
||||
[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.
|
||||
|
||||
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
|
||||
|
||||
|
@ -25,6 +25,12 @@
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
|
||||
---
|
||||
|
||||
### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A!
|
||||
|
||||
---
|
||||
|
||||
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
|
||||
|
||||
<p align="center" name="demo">
|
||||
|
@ -526,12 +526,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.688964637Z",
|
||||
"updatedAt": "2022-10-09T20:27:23.951017769Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -541,12 +541,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.197788495Z",
|
||||
"updatedAt": "2022-10-09T20:28:01.93111053Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -571,12 +571,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:24.242683494Z",
|
||||
"updatedAt": "2022-10-09T20:27:24.312809641Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -617,12 +617,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.595257Z",
|
||||
"updatedAt": "2023-01-30T16:25:58.70348Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -632,12 +632,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.192178Z",
|
||||
"updatedAt": "2023-01-30T16:25:56.455714Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -647,12 +647,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:58.876405Z",
|
||||
"updatedAt": "2023-01-30T16:25:59.079906Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -46,6 +46,7 @@ pub type Checked = settings::Checked;
|
||||
pub type Unchecked = settings::Unchecked;
|
||||
|
||||
pub type Task = updates::UpdateEntry;
|
||||
pub type Kind = updates::UpdateMeta;
|
||||
|
||||
// everything related to the errors
|
||||
pub type ResponseError = errors::ResponseError;
|
||||
@ -107,8 +108,11 @@ impl V2Reader {
|
||||
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
|
||||
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
|
||||
V2IndexReader::new(
|
||||
index.uid.clone(),
|
||||
&self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
|
||||
index,
|
||||
BufReader::new(
|
||||
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
|
||||
),
|
||||
)
|
||||
}))
|
||||
}
|
||||
@ -143,16 +147,41 @@ pub struct V2IndexReader {
|
||||
}
|
||||
|
||||
impl V2IndexReader {
|
||||
pub fn new(name: String, path: &Path) -> Result<Self> {
|
||||
pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
|
||||
let meta = File::open(path.join("meta.json"))?;
|
||||
let meta: DumpMeta = serde_json::from_reader(meta)?;
|
||||
|
||||
let mut created_at = None;
|
||||
let mut updated_at = None;
|
||||
|
||||
for line in tasks.lines() {
|
||||
let task: Task = serde_json::from_str(&line?)?;
|
||||
if !(task.uuid == index_uuid.uuid && task.is_finished()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let new_created_at = match task.update.meta() {
|
||||
Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
|
||||
_ => None,
|
||||
};
|
||||
let new_updated_at = task.update.finished_at();
|
||||
|
||||
if created_at.is_none() || created_at > new_created_at {
|
||||
created_at = new_created_at;
|
||||
}
|
||||
|
||||
if updated_at.is_none() || updated_at < new_updated_at {
|
||||
updated_at = new_updated_at;
|
||||
}
|
||||
}
|
||||
|
||||
let current_time = OffsetDateTime::now_utc();
|
||||
|
||||
let metadata = IndexMetadata {
|
||||
uid: name,
|
||||
uid: index_uuid.uid.clone(),
|
||||
primary_key: meta.primary_key,
|
||||
// FIXME: Iterate over the whole task queue to find the creation and last update date.
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
updated_at: OffsetDateTime::now_utc(),
|
||||
created_at: created_at.unwrap_or(current_time),
|
||||
updated_at: updated_at.unwrap_or(current_time),
|
||||
};
|
||||
|
||||
let ret = V2IndexReader {
|
||||
@ -248,12 +277,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.688964637Z",
|
||||
"updatedAt": "2022-10-09T20:27:23.951017769Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -263,12 +292,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.197788495Z",
|
||||
"updatedAt": "2022-10-09T20:28:01.93111053Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -293,12 +322,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:24.242683494Z",
|
||||
"updatedAt": "2022-10-09T20:27:24.312809641Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -340,12 +369,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.595257Z",
|
||||
"updatedAt": "2023-01-30T16:25:58.70348Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -355,12 +384,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.192178Z",
|
||||
"updatedAt": "2023-01-30T16:25:56.455714Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -370,12 +399,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:58.876405Z",
|
||||
"updatedAt": "2023-01-30T16:25:59.079906Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -227,4 +227,14 @@ impl UpdateStatus {
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finished_at(&self) -> Option<OffsetDateTime> {
|
||||
match self {
|
||||
UpdateStatus::Processing(_) => None,
|
||||
UpdateStatus::Enqueued(_) => None,
|
||||
UpdateStatus::Processed(u) => Some(u.processed_at),
|
||||
UpdateStatus::Aborted(_) => None,
|
||||
UpdateStatus::Failed(u) => Some(u.failed_at),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ one indexing operation.
|
||||
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt;
|
||||
use std::fs::{self, File};
|
||||
use std::io::BufWriter;
|
||||
|
||||
@ -199,6 +200,29 @@ impl Batch {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Batch {
|
||||
/// A text used when we debug the profiling reports.
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let index_uid = self.index_uid();
|
||||
let tasks = self.ids();
|
||||
match self {
|
||||
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
|
||||
Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
|
||||
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
|
||||
Batch::Dump(_) => f.write_str("Dump")?,
|
||||
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
|
||||
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
};
|
||||
match index_uid {
|
||||
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
|
||||
None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexOperation {
|
||||
pub fn index_uid(&self) -> &str {
|
||||
match self {
|
||||
@ -213,6 +237,30 @@ impl IndexOperation {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for IndexOperation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::DocumentOperation")
|
||||
}
|
||||
IndexOperation::DocumentDeletion { .. } => {
|
||||
f.write_str("IndexOperation::DocumentDeletion")
|
||||
}
|
||||
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
|
||||
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
|
||||
}
|
||||
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
|
||||
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
|
||||
IndexOperation::DocumentClearAndSetting { .. } => {
|
||||
f.write_str("IndexOperation::DocumentClearAndSetting")
|
||||
}
|
||||
IndexOperation::SettingsAndDocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::SettingsAndDocumentOperation")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
|
||||
///
|
||||
@ -581,7 +629,7 @@ impl IndexScheduler {
|
||||
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
||||
}
|
||||
|
||||
puffin::profile_function!(format!("{:?}", batch));
|
||||
puffin::profile_function!(batch.to_string());
|
||||
|
||||
match batch {
|
||||
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
||||
@ -848,7 +896,7 @@ impl IndexScheduler {
|
||||
})?;
|
||||
|
||||
// 4. Dump experimental feature settings
|
||||
let features = self.features()?.runtime_features();
|
||||
let features = self.features().runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
|
@ -1,6 +1,8 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn};
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
@ -9,20 +11,19 @@ const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct FeatureData {
|
||||
runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
instance: InstanceTogglableFeatures,
|
||||
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct RoFeatures {
|
||||
runtime: RuntimeTogglableFeatures,
|
||||
instance: InstanceTogglableFeatures,
|
||||
}
|
||||
|
||||
impl RoFeatures {
|
||||
fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
|
||||
let runtime = data.runtime_features(txn)?;
|
||||
Ok(Self { runtime, instance: data.instance })
|
||||
fn new(data: &FeatureData) -> Self {
|
||||
let runtime = data.runtime_features();
|
||||
Self { runtime }
|
||||
}
|
||||
|
||||
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
||||
@ -43,13 +44,13 @@ impl RoFeatures {
|
||||
}
|
||||
|
||||
pub fn check_metrics(&self) -> Result<()> {
|
||||
if self.instance.metrics {
|
||||
if self.runtime.metrics {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Getting metrics",
|
||||
feature: "metrics",
|
||||
issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/625",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
@ -67,15 +68,36 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_puffin(&self) -> Result<()> {
|
||||
if self.runtime.export_puffin_reports {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Outputting Puffin reports to disk",
|
||||
feature: "export puffin reports",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/693",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Self { runtime: runtime_features, instance: instance_features })
|
||||
let txn = env.read_txn()?;
|
||||
let persisted_features: RuntimeTogglableFeatures =
|
||||
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: instance_features.metrics || persisted_features.metrics,
|
||||
..persisted_features
|
||||
}));
|
||||
|
||||
Ok(Self { persisted: runtime_features_db, runtime })
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(
|
||||
@ -83,16 +105,25 @@ impl FeatureData {
|
||||
mut wtxn: RwTxn,
|
||||
features: RuntimeTogglableFeatures,
|
||||
) -> Result<()> {
|
||||
self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
// safe to unwrap, the lock will only fail if:
|
||||
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
|
||||
// 2. there's a panic while the thread is held -> it is only used for an assignment here.
|
||||
let mut toggled_features = self.runtime.write().unwrap();
|
||||
*toggled_features = features;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
|
||||
Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
|
||||
fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
||||
// sound to unwrap, the lock will only fail if:
|
||||
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
|
||||
// 2. there's a panic while the thread is held -> it is only used for copying the data here
|
||||
*self.runtime.read().unwrap()
|
||||
}
|
||||
|
||||
pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
|
||||
RoFeatures::new(txn, self)
|
||||
pub fn features(&self) -> RoFeatures {
|
||||
RoFeatures::new(self)
|
||||
}
|
||||
}
|
||||
|
@ -30,6 +30,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
index_mapper,
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
puffin_frame: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
|
@ -33,6 +33,7 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fs::File;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
@ -52,6 +53,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use puffin::FrameView;
|
||||
use roaring::RoaringBitmap;
|
||||
use synchronoise::SignalEvent;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
@ -314,6 +316,9 @@ pub struct IndexScheduler {
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
|
||||
/// A frame to output the indexation profiling files to disk.
|
||||
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
|
||||
|
||||
/// The path used to create the dumps.
|
||||
pub(crate) dumps_path: PathBuf,
|
||||
|
||||
@ -364,6 +369,7 @@ impl IndexScheduler {
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
puffin_frame: self.puffin_frame.clone(),
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
@ -457,6 +463,7 @@ impl IndexScheduler {
|
||||
env,
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
dumps_path: options.dumps_path,
|
||||
@ -572,17 +579,46 @@ impl IndexScheduler {
|
||||
run.wake_up.wait();
|
||||
|
||||
loop {
|
||||
let puffin_enabled = run.features().check_puffin().is_ok();
|
||||
puffin::set_scopes_on(puffin_enabled);
|
||||
puffin::GlobalProfiler::lock().new_frame();
|
||||
|
||||
match run.tick() {
|
||||
Ok(TickOutcome::TickAgain(_)) => (),
|
||||
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
log::error!("{e}");
|
||||
// Wait one second when an irrecoverable error occurs.
|
||||
if !e.is_recoverable() {
|
||||
std::thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Let's write the previous frame to disk but only if
|
||||
// the user wanted to profile with puffin.
|
||||
if puffin_enabled {
|
||||
let mut frame_view = run.puffin_frame.lock();
|
||||
if !frame_view.is_empty() {
|
||||
let now = OffsetDateTime::now_utc();
|
||||
let mut file = match File::create(format!("{}.puffin", now)) {
|
||||
Ok(file) => file,
|
||||
Err(e) => {
|
||||
log::error!("{e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(e) = frame_view.save_to_writer(&mut file) {
|
||||
log::error!("{e}");
|
||||
}
|
||||
if let Err(e) = file.sync_all() {
|
||||
log::error!("{e}");
|
||||
}
|
||||
// We erase this frame view as it is no more useful. We want to
|
||||
// measure the new frames now that we exported the previous ones.
|
||||
*frame_view = FrameView::default();
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
@ -1062,8 +1098,6 @@ impl IndexScheduler {
|
||||
self.breakpoint(Breakpoint::Start);
|
||||
}
|
||||
|
||||
puffin::GlobalProfiler::lock().new_frame();
|
||||
|
||||
self.cleanup_task_queue()?;
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
@ -1259,9 +1293,8 @@ impl IndexScheduler {
|
||||
Ok(IndexStats { is_indexing, inner_stats: index_stats })
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Result<RoFeatures> {
|
||||
let rtxn = self.read_txn()?;
|
||||
self.features.features(rtxn)
|
||||
pub fn features(&self) -> RoFeatures {
|
||||
self.features.features()
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||
|
@ -5,6 +5,8 @@ use serde::{Deserialize, Serialize};
|
||||
pub struct RuntimeTogglableFeatures {
|
||||
pub score_details: bool,
|
||||
pub vector_store: bool,
|
||||
pub metrics: bool,
|
||||
pub export_puffin_reports: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
|
@ -69,8 +69,7 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.9"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
puffin = "0.16.0"
|
||||
puffin_http = { version = "0.13.0", optional = true }
|
||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.7.0"
|
||||
regex = "1.7.3"
|
||||
@ -135,7 +134,6 @@ zip = { version = "0.6.4", optional = true }
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
analytics = ["segment"]
|
||||
profile-with-puffin = ["dep:puffin_http"]
|
||||
mini-dashboard = [
|
||||
"actix-web-static-files",
|
||||
"static-files",
|
||||
|
@ -114,10 +114,7 @@ pub fn create_app(
|
||||
.configure(routes::configure)
|
||||
.configure(|s| dashboard(s, enable_dashboard));
|
||||
|
||||
let app = app.wrap(actix_web::middleware::Condition::new(
|
||||
opt.experimental_enable_metrics,
|
||||
middleware::RouteMetrics,
|
||||
));
|
||||
let app = app.wrap(middleware::RouteMetrics);
|
||||
app.wrap(
|
||||
Cors::default()
|
||||
.send_wildcard()
|
||||
|
@ -30,10 +30,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
#[cfg(feature = "profile-with-puffin")]
|
||||
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
|
||||
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
|
||||
|
||||
anyhow::ensure!(
|
||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||
|
@ -3,8 +3,10 @@
|
||||
use std::future::{ready, Ready};
|
||||
|
||||
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
|
||||
use actix_web::web::Data;
|
||||
use actix_web::Error;
|
||||
use futures_util::future::LocalBoxFuture;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use prometheus::HistogramTimer;
|
||||
|
||||
pub struct RouteMetrics;
|
||||
@ -47,19 +49,27 @@ where
|
||||
|
||||
fn call(&self, req: ServiceRequest) -> Self::Future {
|
||||
let mut histogram_timer: Option<HistogramTimer> = None;
|
||||
let request_path = req.path();
|
||||
let is_registered_resource = req.resource_map().has_resource(request_path);
|
||||
if is_registered_resource {
|
||||
let request_method = req.method().to_string();
|
||||
histogram_timer = Some(
|
||||
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
|
||||
|
||||
// calling unwrap here is safe because index scheduler is added to app data while creating actix app.
|
||||
// also, the tests will fail if this is not present.
|
||||
let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
|
||||
let features = index_scheduler.features();
|
||||
|
||||
if features.check_metrics().is_ok() {
|
||||
let request_path = req.path();
|
||||
let is_registered_resource = req.resource_map().has_resource(request_path);
|
||||
if is_registered_resource {
|
||||
let request_method = req.method().to_string();
|
||||
histogram_timer = Some(
|
||||
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
|
||||
.with_label_values(&[&request_method, request_path])
|
||||
.start_timer(),
|
||||
);
|
||||
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
|
||||
.with_label_values(&[&request_method, request_path])
|
||||
.start_timer(),
|
||||
);
|
||||
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
|
||||
.with_label_values(&[&request_method, request_path])
|
||||
.inc();
|
||||
}
|
||||
.inc();
|
||||
}
|
||||
};
|
||||
|
||||
let fut = self.service.call(req);
|
||||
|
||||
|
@ -29,12 +29,12 @@ async fn get_features(
|
||||
>,
|
||||
req: HttpRequest,
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let features = index_scheduler.features()?;
|
||||
) -> HttpResponse {
|
||||
let features = index_scheduler.features();
|
||||
|
||||
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
|
||||
debug!("returns: {:?}", features.runtime_features());
|
||||
Ok(HttpResponse::Ok().json(features.runtime_features()))
|
||||
HttpResponse::Ok().json(features.runtime_features())
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
@ -44,6 +44,10 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub score_details: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub vector_store: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub metrics: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub export_puffin_reports: Option<bool>,
|
||||
}
|
||||
|
||||
async fn patch_features(
|
||||
@ -55,26 +59,36 @@ async fn patch_features(
|
||||
req: HttpRequest,
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let features = index_scheduler.features()?;
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let old_features = features.runtime_features();
|
||||
|
||||
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
|
||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
||||
export_puffin_reports: new_features
|
||||
.0
|
||||
.export_puffin_reports
|
||||
.unwrap_or(old_features.export_puffin_reports),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
// the it renames to camelCase, which we don't want for analytics.
|
||||
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } =
|
||||
new_features;
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
score_details,
|
||||
vector_store,
|
||||
metrics,
|
||||
export_puffin_reports,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
"Experimental features Updated".to_string(),
|
||||
json!({
|
||||
"score_details": score_details,
|
||||
"vector_store": vector_store,
|
||||
"metrics": metrics,
|
||||
"export_puffin_reports": export_puffin_reports,
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
@ -68,7 +68,7 @@ pub async fn search(
|
||||
}
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features()?;
|
||||
let features = index_scheduler.features();
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_facet_search(&index, search_query, facet_query, facet_name, features)
|
||||
})
|
||||
|
@ -157,7 +157,7 @@ pub async fn search_with_url_query(
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features()?;
|
||||
let features = index_scheduler.features();
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
@ -192,7 +192,7 @@ pub async fn search_with_post(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let features = index_scheduler.features()?;
|
||||
let features = index_scheduler.features();
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
|
@ -19,7 +19,7 @@ pub async fn get_metrics(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: Data<AuthController>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features()?.check_metrics()?;
|
||||
index_scheduler.features().check_metrics()?;
|
||||
let auth_filters = index_scheduler.filters();
|
||||
if !auth_filters.all_indexes_authorized() {
|
||||
let mut error = ResponseError::from(AuthenticationError::InvalidToken);
|
||||
|
@ -41,7 +41,7 @@ pub async fn multi_search_with_post(
|
||||
let queries = params.into_inner().queries;
|
||||
|
||||
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
|
||||
let features = index_scheduler.features()?;
|
||||
let features = index_scheduler.features();
|
||||
|
||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||
|
@ -2,10 +2,12 @@ use std::collections::{HashMap, HashSet};
|
||||
|
||||
use ::time::format_description::well_known::Rfc3339;
|
||||
use maplit::{hashmap, hashset};
|
||||
use meilisearch::Opt;
|
||||
use once_cell::sync::Lazy;
|
||||
use tempfile::TempDir;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::common::{Server, Value};
|
||||
use crate::common::{default_settings, Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
|
||||
@ -195,7 +197,9 @@ async fn access_authorized_master_key() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn access_authorized_restricted_index() {
|
||||
let mut server = Server::new_auth().await;
|
||||
let dir = TempDir::new().unwrap();
|
||||
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
|
||||
let mut server = Server::new_auth_with_options(enable_metrics, dir).await;
|
||||
for ((method, route), actions) in AUTHORIZATIONS.iter() {
|
||||
for action in actions {
|
||||
// create a new API key letting only the needed action.
|
||||
|
@ -202,6 +202,10 @@ impl Server {
|
||||
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
|
||||
self.service.patch("/experimental-features", value).await
|
||||
}
|
||||
|
||||
pub async fn get_metrics(&self) -> (Value, StatusCode) {
|
||||
self.service.get("/metrics").await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
@ -221,7 +225,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
skip_index_budget: true,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
},
|
||||
experimental_enable_metrics: true,
|
||||
experimental_enable_metrics: false,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,7 @@
|
||||
use crate::common::Server;
|
||||
use meilisearch::Opt;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::common::{default_settings, Server};
|
||||
use crate::json;
|
||||
|
||||
/// Feature name to test against.
|
||||
@ -16,7 +19,9 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -26,7 +31,9 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -36,7 +43,9 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -47,7 +56,9 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -58,11 +69,73 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn experimental_feature_metrics() {
|
||||
// instance flag for metrics enables metrics at startup
|
||||
let dir = TempDir::new().unwrap();
|
||||
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
|
||||
let server = Server::new_with_options(enable_metrics).await.unwrap();
|
||||
|
||||
let (response, code) = server.get_features().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": true,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = server.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
|
||||
// metrics are not returned in json format
|
||||
// so the test server will return null
|
||||
meili_snap::snapshot!(response, @"null");
|
||||
|
||||
// disabling metrics results in invalid request
|
||||
let (response, code) = server.set_features(json!({"metrics": false})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response["metrics"], @"false");
|
||||
|
||||
let (response, code) = server.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
// enabling metrics via HTTP results in valid request
|
||||
let (response, code) = server.set_features(json!({"metrics": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response["metrics"], @"true");
|
||||
|
||||
let (response, code) = server.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response, @"null");
|
||||
|
||||
// startup without flag respects persisted metrics value
|
||||
let disable_metrics =
|
||||
Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
|
||||
let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
|
||||
let (response, code) = server_no_flag.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response, @"null");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn errors() {
|
||||
let server = Server::new().await;
|
||||
@ -73,7 +146,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
63
meilisearch/tests/search/distinct.rs
Normal file
63
meilisearch/tests/search/distinct.rs
Normal file
@ -0,0 +1,63 @@
|
||||
use meili_snap::snapshot;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{"productId": 1, "shopId": 1},
|
||||
{"productId": 2, "shopId": 1},
|
||||
{"productId": 3, "shopId": 2},
|
||||
{"productId": 4, "shopId": 2},
|
||||
{"productId": 5, "shopId": 3},
|
||||
{"productId": 6, "shopId": 3},
|
||||
{"productId": 7, "shopId": 4},
|
||||
{"productId": 8, "shopId": 4},
|
||||
{"productId": 9, "shopId": 5},
|
||||
{"productId": 10, "shopId": 5}
|
||||
])
|
||||
});
|
||||
|
||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
|
||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
#[actix_rt::test]
|
||||
async fn distinct_search_with_offset_no_ranking() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(Value(response): Value) -> Vec<i64> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[1, 2]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[3, 4]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"1");
|
||||
snapshot!(format!("{:?}", hits), @"[5]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
// This modules contains all the test concerning search. Each particular feature of the search
|
||||
// should be tested in its own module to isolate tests and keep the tests readable.
|
||||
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod facet_search;
|
||||
mod formatted;
|
||||
@ -816,7 +817,7 @@ async fn experimental_feature_score_details() {
|
||||
},
|
||||
"proximity": {
|
||||
"order": 2,
|
||||
"score": 0.875
|
||||
"score": 0.75
|
||||
},
|
||||
"attribute": {
|
||||
"order": 3,
|
||||
|
@ -1,4 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{io, str};
|
||||
|
||||
use obkv::KvReader;
|
||||
@ -19,14 +20,14 @@ use crate::FieldId;
|
||||
pub struct EnrichedDocumentsBatchReader<R> {
|
||||
documents: DocumentsBatchReader<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::ReaderCursor<File>,
|
||||
external_ids: grenad::ReaderCursor<BufReader<File>>,
|
||||
}
|
||||
|
||||
impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
|
||||
pub fn new(
|
||||
documents: DocumentsBatchReader<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::Reader<File>,
|
||||
external_ids: grenad::Reader<BufReader<File>>,
|
||||
) -> Result<Self, Error> {
|
||||
if documents.documents_count() as u64 == external_ids.len() {
|
||||
Ok(EnrichedDocumentsBatchReader {
|
||||
@ -75,7 +76,7 @@ pub struct EnrichedDocument<'a> {
|
||||
pub struct EnrichedDocumentsBatchCursor<R> {
|
||||
documents: DocumentsBatchCursor<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::ReaderCursor<File>,
|
||||
external_ids: grenad::ReaderCursor<BufReader<File>>,
|
||||
}
|
||||
|
||||
impl<R> EnrichedDocumentsBatchCursor<R> {
|
||||
|
@ -2,7 +2,7 @@ use std::cmp;
|
||||
|
||||
use crate::{relative_from_absolute_position, Position};
|
||||
|
||||
pub const MAX_DISTANCE: u32 = 8;
|
||||
pub const MAX_DISTANCE: u32 = 4;
|
||||
|
||||
pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
|
||||
if lhs <= rhs {
|
||||
|
@ -46,18 +46,27 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
if let Some(distinct_fid) = distinct_fid {
|
||||
let mut excluded = RoaringBitmap::new();
|
||||
let mut results = vec![];
|
||||
let mut skip = 0;
|
||||
for docid in universe.iter() {
|
||||
if results.len() >= from + length {
|
||||
if results.len() >= length {
|
||||
break;
|
||||
}
|
||||
if excluded.contains(docid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
||||
skip += 1;
|
||||
if skip <= from {
|
||||
continue;
|
||||
}
|
||||
|
||||
results.push(docid);
|
||||
}
|
||||
|
||||
let mut all_candidates = universe - excluded;
|
||||
all_candidates.extend(results.iter().copied());
|
||||
|
||||
return Ok(BucketSortOutput {
|
||||
scores: vec![Default::default(); results.len()],
|
||||
docids: results,
|
||||
|
@ -192,7 +192,6 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
}
|
||||
|
||||
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
match prefix {
|
||||
Word::Original(prefix) => {
|
||||
let exact = self.get_db_exact_word_prefix_docids(prefix)?;
|
||||
@ -217,7 +216,6 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self,
|
||||
prefix: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
match &self.restricted_fids {
|
||||
Some(restricted_fids) => {
|
||||
let interned = self.word_interner.get(prefix).as_str();
|
||||
@ -246,7 +244,6 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self,
|
||||
prefix: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
@ -300,7 +297,6 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
prefix2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, prefix2),
|
||||
@ -319,7 +315,6 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
right: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, left_prefix, right),
|
||||
@ -357,7 +352,6 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word_prefix: Interned<String>,
|
||||
fid: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
// if the requested fid isn't in the restricted list, return None.
|
||||
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
|
||||
return Ok(None);
|
||||
@ -399,7 +393,6 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
}
|
||||
|
||||
pub fn get_db_word_prefix_fids(&mut self, word_prefix: Interned<String>) -> Result<Vec<u16>> {
|
||||
unreachable!();
|
||||
let fids = match self.db_cache.word_prefix_fids.entry(word_prefix) {
|
||||
Entry::Occupied(fids) => fids.get().clone(),
|
||||
Entry::Vacant(entry) => {
|
||||
@ -446,7 +439,6 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word_prefix: Interned<String>,
|
||||
position: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(word_prefix, position),
|
||||
@ -488,7 +480,6 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self,
|
||||
word_prefix: Interned<String>,
|
||||
) -> Result<Vec<u16>> {
|
||||
unreachable!();
|
||||
let positions = match self.db_cache.word_prefix_positions.entry(word_prefix) {
|
||||
Entry::Occupied(positions) => positions.get().clone(),
|
||||
Entry::Vacant(entry) => {
|
||||
|
@ -1,6 +1,7 @@
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use super::ProximityCondition;
|
||||
use crate::proximity::MAX_DISTANCE;
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::SearchContext;
|
||||
@ -35,7 +36,7 @@ pub fn build_edges(
|
||||
}
|
||||
|
||||
let mut conditions = vec![];
|
||||
for cost in right_ngram_max..(7 + right_ngram_max) {
|
||||
for cost in right_ngram_max..(((MAX_DISTANCE as usize) - 1) + right_ngram_max) {
|
||||
conditions.push((
|
||||
cost as u32,
|
||||
conditions_interner.insert(ProximityCondition::Uninit {
|
||||
@ -47,7 +48,7 @@ pub fn build_edges(
|
||||
}
|
||||
|
||||
conditions.push((
|
||||
(7 + right_ngram_max) as u32,
|
||||
((MAX_DISTANCE - 1) + (right_ngram_max as u32)),
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
));
|
||||
|
||||
|
@ -273,7 +273,7 @@ fn test_proximity_simple() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 5, 2, 3, 0, 1]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 2, 3, 5, 1, 0]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
@ -282,11 +282,11 @@ fn test_proximity_simple() {
|
||||
"\"the quickbrown fox jumps over the lazy dog\"",
|
||||
"\"the really quick brown fox jumps over the lazy dog\"",
|
||||
"\"the really quick brown fox jumps over the very lazy dog\"",
|
||||
"\"brown quick fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy. dog\"",
|
||||
"\"dog the quick brown fox jumps over the lazy\"",
|
||||
"\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
|
||||
"\"brown quick fox jumps over the lazy dog\"",
|
||||
"\"the. quick brown fox jumps over the lazy. dog\"",
|
||||
"\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best s");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11, 15]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@ -382,9 +382,9 @@ fn test_proximity_prefix_db() {
|
||||
"\"summer best\"",
|
||||
"\"this is the best meal of summer\"",
|
||||
"\"summer x best\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"summer x y best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
@ -396,7 +396,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best su");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 11, 7, 6, 15]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@ -406,10 +406,10 @@ fn test_proximity_prefix_db() {
|
||||
"\"summer best\"",
|
||||
"\"this is the best meal of summer\"",
|
||||
"\"summer x best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"summer x y best\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
"###);
|
||||
@ -447,7 +447,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best wint");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 20, 16, 15]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@ -457,10 +457,10 @@ fn test_proximity_prefix_db() {
|
||||
"\"winter best\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"winter x best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"winter x y best\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
@ -471,7 +471,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best wi");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 15, 16, 20]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@ -481,9 +481,9 @@ fn test_proximity_prefix_db() {
|
||||
"\"winter best\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"winter x best\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"winter x y best\"",
|
||||
]
|
||||
"###);
|
||||
|
@ -68,8 +68,8 @@ fn test_trap_basic() {
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
@ -82,8 +82,8 @@ fn test_trap_basic() {
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
|
@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 35,
|
||||
max_rank: 57,
|
||||
rank: 9,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -101,8 +101,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -153,8 +153,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -179,8 +179,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 21,
|
||||
max_rank: 22,
|
||||
rank: 9,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -205,8 +205,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 17,
|
||||
max_rank: 22,
|
||||
rank: 5,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -231,8 +231,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 17,
|
||||
max_rank: 22,
|
||||
rank: 5,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -3,59 +3,35 @@ source: milli/src/search/new/tests/proximity.rs
|
||||
expression: "format!(\"{document_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -63,7 +39,31 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,40 +6,32 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -47,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -55,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -63,7 +55,15 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,40 +6,32 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -47,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -55,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -63,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -71,7 +63,15 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -3,59 +3,35 @@ source: milli/src/search/new/tests/proximity.rs
|
||||
expression: "format!(\"{document_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -63,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -71,7 +47,31 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -7,7 +7,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -15,7 +15,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -47,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -55,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -63,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,24 +6,24 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 8,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 15,
|
||||
rank: 4,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 50,
|
||||
max_rank: 50,
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -24,132 +24,6 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 50,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 49,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 49,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 48,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 41,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 40,
|
||||
max_rank: 50,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 8,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 36,
|
||||
max_rank: 36,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 31,
|
||||
max_rank: 36,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 5,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
@ -160,14 +34,126 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 4,
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 21,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 21,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 20,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 17,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 16,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 8,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 16,
|
||||
max_rank: 16,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 7,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 16,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 5,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -180,8 +166,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -194,8 +180,22 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 4,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 42,
|
||||
max_rank: 43,
|
||||
rank: 18,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 42,
|
||||
max_rank: 43,
|
||||
rank: 18,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 41,
|
||||
max_rank: 43,
|
||||
rank: 17,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 34,
|
||||
max_rank: 43,
|
||||
rank: 14,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 33,
|
||||
max_rank: 43,
|
||||
rank: 13,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 36,
|
||||
max_rank: 36,
|
||||
rank: 16,
|
||||
max_rank: 16,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 29,
|
||||
max_rank: 29,
|
||||
rank: 13,
|
||||
max_rank: 13,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 24,
|
||||
max_rank: 29,
|
||||
rank: 10,
|
||||
max_rank: 13,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 55,
|
||||
max_rank: 57,
|
||||
rank: 23,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 52,
|
||||
max_rank: 57,
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 51,
|
||||
max_rank: 57,
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 48,
|
||||
max_rank: 57,
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 47,
|
||||
max_rank: 57,
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 57,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 50,
|
||||
max_rank: 50,
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 43,
|
||||
max_rank: 43,
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 38,
|
||||
max_rank: 43,
|
||||
rank: 16,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -222,8 +222,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 29,
|
||||
max_rank: 29,
|
||||
rank: 13,
|
||||
max_rank: 13,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -236,8 +236,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -250,8 +250,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -264,8 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 22,
|
||||
max_rank: 22,
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 15,
|
||||
rank: 7,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 55,
|
||||
max_rank: 57,
|
||||
rank: 23,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 52,
|
||||
max_rank: 57,
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 47,
|
||||
max_rank: 57,
|
||||
rank: 18,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 45,
|
||||
max_rank: 57,
|
||||
rank: 18,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 57,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 47,
|
||||
max_rank: 50,
|
||||
rank: 19,
|
||||
max_rank: 22,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 40,
|
||||
max_rank: 43,
|
||||
rank: 16,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 35,
|
||||
max_rank: 43,
|
||||
rank: 13,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -222,8 +222,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 26,
|
||||
max_rank: 29,
|
||||
rank: 10,
|
||||
max_rank: 13,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -236,8 +236,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 22,
|
||||
rank: 7,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -250,8 +250,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 22,
|
||||
rank: 7,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -264,8 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 22,
|
||||
rank: 7,
|
||||
max_rank: 10,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 15,
|
||||
rank: 5,
|
||||
max_rank: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -6,88 +6,88 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 57,
|
||||
max_rank: 57,
|
||||
rank: 25,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 56,
|
||||
max_rank: 57,
|
||||
rank: 24,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 55,
|
||||
max_rank: 57,
|
||||
rank: 23,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 54,
|
||||
max_rank: 57,
|
||||
rank: 22,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 53,
|
||||
max_rank: 57,
|
||||
rank: 21,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 52,
|
||||
max_rank: 57,
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 51,
|
||||
max_rank: 57,
|
||||
rank: 20,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 48,
|
||||
max_rank: 57,
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 47,
|
||||
max_rank: 57,
|
||||
rank: 19,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
@ -95,7 +95,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 57,
|
||||
max_rank: 25,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -259,8 +259,8 @@ fn test_ignore_stop_words() {
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
Fid(
|
||||
@ -411,8 +411,8 @@ fn test_stop_words_in_phrase() {
|
||||
),
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 8,
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
Fid(
|
||||
|
@ -277,7 +277,7 @@ fn test_words_proximity_tms_last_simple() {
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
|
||||
// 7 is better than 6 because of the proximity between "the" and its surrounding terms
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
@ -289,10 +289,10 @@ fn test_words_proximity_tms_last_simple() {
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
@ -312,7 +312,7 @@ fn test_words_proximity_tms_last_simple() {
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
|
||||
// 10 is better than 9 because of the proximity between "quick" and "brown"
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 15, 16, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
@ -326,8 +326,8 @@ fn test_words_proximity_tms_last_simple() {
|
||||
"\"the great quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
@ -427,7 +427,7 @@ fn test_words_tms_all() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
@ -439,10 +439,10 @@ fn test_words_tms_all() {
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
]
|
||||
"###);
|
||||
|
@ -108,15 +108,17 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
self.delete_document(docid);
|
||||
Some(docid)
|
||||
}
|
||||
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
||||
puffin::profile_function!();
|
||||
|
||||
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
||||
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
|
||||
self.execute_inner()?;
|
||||
|
||||
Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
|
||||
}
|
||||
|
||||
pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
|
||||
puffin::profile_function!();
|
||||
|
||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||
|
||||
// We retrieve the current documents ids that are in the database.
|
||||
@ -476,6 +478,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
C: for<'a> BytesDecode<'a, DItem = RoaringBitmap>
|
||||
+ for<'a> BytesEncode<'a, EItem = RoaringBitmap>,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
while let Some(result) = iter.next() {
|
||||
let (bytes, mut docids) = result?;
|
||||
let previous_len = docids.len();
|
||||
@ -498,6 +502,8 @@ fn remove_from_word_prefix_docids(
|
||||
db: &Database<Str, RoaringBitmapCodec>,
|
||||
to_remove: &RoaringBitmap,
|
||||
) -> Result<fst::Set<Vec<u8>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut prefixes_to_delete = fst::SetBuilder::memory();
|
||||
|
||||
// We iterate over the word prefix docids database and remove the deleted documents ids
|
||||
@ -528,6 +534,8 @@ fn remove_from_word_docids(
|
||||
words_to_keep: &mut BTreeSet<String>,
|
||||
words_to_remove: &mut BTreeSet<String>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
// We create an iterator to be able to get the content and delete the word docids.
|
||||
// It's faster to acquire a cursor to get and delete or put, as we avoid traversing
|
||||
// the LMDB B-Tree two times but only once.
|
||||
@ -559,6 +567,8 @@ fn remove_docids_from_field_id_docid_facet_value(
|
||||
field_id: FieldId,
|
||||
to_remove: &RoaringBitmap,
|
||||
) -> heed::Result<HashSet<Vec<u8>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let db = match facet_type {
|
||||
FacetType::String => {
|
||||
index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
|
||||
@ -594,6 +604,8 @@ fn remove_docids_from_facet_id_docids<'a, C>(
|
||||
where
|
||||
C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
|
||||
while let Some(result) = iter.next() {
|
||||
let (bytes, mut docids) = result?;
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use grenad::CompressionType;
|
||||
use heed::types::ByteSlice;
|
||||
@ -30,7 +31,7 @@ pub struct FacetsUpdateBulk<'i> {
|
||||
facet_type: FacetType,
|
||||
field_ids: Vec<FieldId>,
|
||||
// None if level 0 does not need to be updated
|
||||
new_data: Option<grenad::Reader<File>>,
|
||||
new_data: Option<grenad::Reader<BufReader<File>>>,
|
||||
}
|
||||
|
||||
impl<'i> FacetsUpdateBulk<'i> {
|
||||
@ -38,7 +39,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
index: &'i Index,
|
||||
field_ids: Vec<FieldId>,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<File>,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
group_size: u8,
|
||||
min_level_size: u8,
|
||||
) -> FacetsUpdateBulk<'i> {
|
||||
@ -187,7 +188,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
txn: &RoTxn,
|
||||
) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
|
||||
) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> {
|
||||
let mut all_docids = RoaringBitmap::new();
|
||||
let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
|
||||
for bitmap in bitmaps {
|
||||
@ -259,7 +260,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
field_id: u16,
|
||||
level: u8,
|
||||
handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
|
||||
) -> Result<Vec<grenad::Reader<File>>> {
|
||||
) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
|
||||
if level == 0 {
|
||||
self.read_level_0(rtxn, field_id, handle_group)?;
|
||||
// Level 0 is already in the database
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
||||
@ -34,14 +35,14 @@ pub struct FacetsUpdateIncremental<'i> {
|
||||
index: &'i Index,
|
||||
inner: FacetsUpdateIncrementalInner,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<File>,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
}
|
||||
|
||||
impl<'i> FacetsUpdateIncremental<'i> {
|
||||
pub fn new(
|
||||
index: &'i Index,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<File>,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
group_size: u8,
|
||||
min_level_size: u8,
|
||||
max_group_size: u8,
|
||||
|
@ -78,6 +78,7 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use charabia::normalizer::{Normalize, NormalizerOption};
|
||||
@ -108,13 +109,17 @@ pub struct FacetsUpdate<'i> {
|
||||
index: &'i Index,
|
||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<File>,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
group_size: u8,
|
||||
max_group_size: u8,
|
||||
min_level_size: u8,
|
||||
}
|
||||
impl<'i> FacetsUpdate<'i> {
|
||||
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
||||
pub fn new(
|
||||
index: &'i Index,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
) -> Self {
|
||||
let database = match facet_type {
|
||||
FacetType::String => index
|
||||
.facet_id_string_docids
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::io::{Read, Seek};
|
||||
use std::io::{BufWriter, Read, Seek};
|
||||
use std::result::Result as StdResult;
|
||||
use std::{fmt, iter};
|
||||
|
||||
@ -35,7 +35,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
|
||||
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
|
||||
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
|
||||
let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
|
||||
let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];
|
||||
|
||||
// The primary key *field id* that has already been set for this index or the one
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{io, mem, str};
|
||||
|
||||
use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
|
||||
@ -31,7 +32,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
allowed_separators: Option<&[&str]>,
|
||||
dictionary: Option<&[&str]>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_positions_per_attributes = max_positions_per_attributes
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{self, BufReader};
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
@ -19,7 +19,7 @@ use crate::Result;
|
||||
pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_number: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{self, BufReader};
|
||||
|
||||
use heed::BytesEncode;
|
||||
|
||||
@ -17,7 +17,7 @@ use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{self, BufReader};
|
||||
use std::mem::size_of;
|
||||
|
||||
use heed::zerocopy::AsBytes;
|
||||
@ -17,11 +17,11 @@ use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET
|
||||
|
||||
/// The extracted facet values stored in grenad files by type.
|
||||
pub struct ExtractedFacetValues {
|
||||
pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
|
||||
pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
|
||||
pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
|
||||
pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
|
||||
pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
|
||||
pub docid_fid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub docid_fid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
|
||||
}
|
||||
|
||||
/// Extracts the facet values of each faceted field of each document.
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{self, BufReader};
|
||||
|
||||
use grenad::Sorter;
|
||||
|
||||
@ -21,7 +21,7 @@ use crate::{relative_from_absolute_position, DocumentId, FieldId, Result};
|
||||
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{self, BufReader};
|
||||
|
||||
use concat_arrays::concat_arrays;
|
||||
use serde_json::Value;
|
||||
@ -18,7 +18,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
primary_key_id: FieldId,
|
||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::convert::TryFrom;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{self, BufReader};
|
||||
|
||||
use bytemuck::cast_slice;
|
||||
use serde_json::{from_slice, Value};
|
||||
@ -18,7 +18,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
primary_key_id: FieldId,
|
||||
vectors_fid: FieldId,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{self, BufReader};
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
@ -26,7 +26,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
exact_attributes: &HashSet<FieldId>,
|
||||
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{self, BufReader};
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
|
||||
@ -14,7 +14,7 @@ use crate::{relative_from_absolute_position, DocumentId, Result};
|
||||
pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{BinaryHeap, HashMap};
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{cmp, io, mem, str, vec};
|
||||
|
||||
use super::helpers::{
|
||||
@ -20,7 +21,7 @@ use crate::{DocumentId, Result};
|
||||
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{self, BufReader};
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
|
||||
@ -17,7 +17,7 @@ use crate::{bucketed_position, relative_from_absolute_position, DocumentId, Resu
|
||||
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
@ -12,6 +12,7 @@ mod extract_word_position_docids;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use crossbeam_channel::Sender;
|
||||
use log::debug;
|
||||
@ -39,8 +40,8 @@ use crate::{FieldId, Result};
|
||||
/// Send data in grenad file over provided Sender.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn data_from_obkv_documents(
|
||||
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
|
||||
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
|
||||
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
|
||||
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
searchable_fields: Option<HashSet<FieldId>>,
|
||||
@ -152,7 +153,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
});
|
||||
}
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@ -162,7 +163,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"word-pair-proximity-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@ -172,7 +173,11 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"field-id-wordcount-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<(grenad::Reader<File>, grenad::Reader<File>)>>(
|
||||
spawn_extraction_task::<
|
||||
_,
|
||||
_,
|
||||
Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)>,
|
||||
>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@ -185,7 +190,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"word-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@ -194,7 +199,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
TypedChunk::WordPositionDocids,
|
||||
"word-position-docids",
|
||||
);
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
docid_word_positions_chunks,
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@ -204,7 +209,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"word-fid-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
docid_fid_facet_strings_chunks,
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@ -214,7 +219,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"field-id-facet-string-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
docid_fid_facet_numbers_chunks,
|
||||
indexer,
|
||||
lmdb_writer_sx,
|
||||
@ -269,7 +274,7 @@ fn spawn_extraction_task<FE, FS, M>(
|
||||
/// Extract chunked data and send it into lmdb_writer_sx sender:
|
||||
/// - documents
|
||||
fn send_original_documents_data(
|
||||
original_documents_chunk: Result<grenad::Reader<File>>,
|
||||
original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
@ -311,7 +316,7 @@ fn send_original_documents_data(
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn send_and_extract_flattened_documents_data(
|
||||
flattened_documents_chunk: Result<grenad::Reader<File>>,
|
||||
flattened_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
@ -328,7 +333,10 @@ fn send_and_extract_flattened_documents_data(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
(grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
|
||||
(
|
||||
grenad::Reader<BufReader<File>>,
|
||||
(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>),
|
||||
),
|
||||
),
|
||||
),
|
||||
)> {
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Seek};
|
||||
use std::io::{self, BufReader, BufWriter, Seek};
|
||||
use std::time::Instant;
|
||||
|
||||
use grenad::{CompressionType, Sorter};
|
||||
@ -17,13 +17,13 @@ pub fn create_writer<R: io::Write>(
|
||||
typ: grenad::CompressionType,
|
||||
level: Option<u32>,
|
||||
file: R,
|
||||
) -> grenad::Writer<R> {
|
||||
) -> grenad::Writer<BufWriter<R>> {
|
||||
let mut builder = grenad::Writer::builder();
|
||||
builder.compression_type(typ);
|
||||
if let Some(level) = level {
|
||||
builder.compression_level(level);
|
||||
}
|
||||
builder.build(file)
|
||||
builder.build(BufWriter::new(file))
|
||||
}
|
||||
|
||||
pub fn create_sorter(
|
||||
@ -53,7 +53,7 @@ pub fn create_sorter(
|
||||
pub fn sorter_into_reader(
|
||||
sorter: grenad::Sorter<MergeFn>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
@ -64,16 +64,18 @@ pub fn sorter_into_reader(
|
||||
writer_into_reader(writer)
|
||||
}
|
||||
|
||||
pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader<File>> {
|
||||
let mut file = writer.into_inner()?;
|
||||
pub fn writer_into_reader(
|
||||
writer: grenad::Writer<BufWriter<File>>,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
let mut file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
|
||||
file.rewind()?;
|
||||
grenad::Reader::new(file).map_err(Into::into)
|
||||
grenad::Reader::new(BufReader::new(file)).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub unsafe fn as_cloneable_grenad(
|
||||
reader: &grenad::Reader<File>,
|
||||
reader: &grenad::Reader<BufReader<File>>,
|
||||
) -> Result<grenad::Reader<CursorClonableMmap>> {
|
||||
let file = reader.get_ref();
|
||||
let file = reader.get_ref().get_ref();
|
||||
let mmap = memmap2::Mmap::map(file)?;
|
||||
let cursor = io::Cursor::new(ClonableMmap::from(mmap));
|
||||
let reader = grenad::Reader::new(cursor)?;
|
||||
@ -89,8 +91,8 @@ where
|
||||
fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
|
||||
}
|
||||
|
||||
impl MergeableReader for Vec<grenad::Reader<File>> {
|
||||
type Output = grenad::Reader<File>;
|
||||
impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
|
||||
type Output = grenad::Reader<BufReader<File>>;
|
||||
|
||||
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
|
||||
let mut merger = MergerBuilder::new(merge_fn);
|
||||
@ -99,8 +101,8 @@ impl MergeableReader for Vec<grenad::Reader<File>> {
|
||||
}
|
||||
}
|
||||
|
||||
impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||
type Output = (grenad::Reader<File>, grenad::Reader<File>);
|
||||
impl MergeableReader for Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
type Output = (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>);
|
||||
|
||||
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
|
||||
let mut m1 = MergerBuilder::new(merge_fn);
|
||||
@ -125,7 +127,7 @@ impl<R: io::Read + io::Seek> MergerBuilder<R> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<File>> {
|
||||
fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
let merger = self.0.build();
|
||||
let mut writer = create_writer(
|
||||
params.chunk_compression_type,
|
||||
@ -176,7 +178,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
|
||||
reader: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
documents_chunk_size: usize,
|
||||
) -> Result<impl Iterator<Item = Result<grenad::Reader<File>>>> {
|
||||
) -> Result<impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>>> {
|
||||
let mut continue_reading = true;
|
||||
let mut cursor = reader.into_cursor()?;
|
||||
|
||||
|
@ -470,13 +470,13 @@ where
|
||||
let all_documents_ids = index_documents_ids | new_documents_ids;
|
||||
self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
|
||||
|
||||
// self.execute_prefix_databases(
|
||||
// word_docids,
|
||||
// exact_word_docids,
|
||||
// word_pair_proximity_docids,
|
||||
// word_position_docids,
|
||||
// word_fid_docids,
|
||||
// )?;
|
||||
self.execute_prefix_databases(
|
||||
word_docids,
|
||||
exact_word_docids,
|
||||
word_pair_proximity_docids,
|
||||
word_position_docids,
|
||||
word_fid_docids,
|
||||
)?;
|
||||
|
||||
Ok(all_documents_ids.len())
|
||||
}
|
||||
|
@ -659,8 +659,10 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
new_documents_ids: self.new_documents_ids,
|
||||
replaced_documents_ids: self.replaced_documents_ids,
|
||||
documents_count: self.documents_count,
|
||||
original_documents,
|
||||
flattened_documents,
|
||||
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
|
||||
flattened_documents: flattened_documents
|
||||
.into_inner()
|
||||
.map_err(|err| err.into_error())?,
|
||||
})
|
||||
}
|
||||
|
||||
@ -779,8 +781,10 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
new_documents_ids: documents_ids,
|
||||
replaced_documents_ids: RoaringBitmap::default(),
|
||||
documents_count,
|
||||
original_documents,
|
||||
flattened_documents,
|
||||
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
|
||||
flattened_documents: flattened_documents
|
||||
.into_inner()
|
||||
.map_err(|err| err.into_error())?,
|
||||
};
|
||||
|
||||
let new_facets = output.compute_real_facets(wtxn, self.index)?;
|
||||
|
@ -2,7 +2,7 @@ use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{self, BufReader};
|
||||
|
||||
use bytemuck::allocation::pod_collect_to_vec;
|
||||
use charabia::{Language, Script};
|
||||
@ -27,22 +27,22 @@ pub(crate) enum TypedChunk {
|
||||
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
|
||||
FieldIdDocidFacetNumbers(grenad::Reader<CursorClonableMmap>),
|
||||
Documents(grenad::Reader<CursorClonableMmap>),
|
||||
FieldIdWordcountDocids(grenad::Reader<File>),
|
||||
FieldIdWordcountDocids(grenad::Reader<BufReader<File>>),
|
||||
NewDocumentsIds(RoaringBitmap),
|
||||
WordDocids {
|
||||
word_docids_reader: grenad::Reader<File>,
|
||||
exact_word_docids_reader: grenad::Reader<File>,
|
||||
word_docids_reader: grenad::Reader<BufReader<File>>,
|
||||
exact_word_docids_reader: grenad::Reader<BufReader<File>>,
|
||||
},
|
||||
WordPositionDocids(grenad::Reader<File>),
|
||||
WordFidDocids(grenad::Reader<File>),
|
||||
WordPairProximityDocids(grenad::Reader<File>),
|
||||
FieldIdFacetStringDocids(grenad::Reader<File>),
|
||||
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
||||
FieldIdFacetExistsDocids(grenad::Reader<File>),
|
||||
FieldIdFacetIsNullDocids(grenad::Reader<File>),
|
||||
FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
|
||||
GeoPoints(grenad::Reader<File>),
|
||||
VectorPoints(grenad::Reader<File>),
|
||||
WordPositionDocids(grenad::Reader<BufReader<File>>),
|
||||
WordFidDocids(grenad::Reader<BufReader<File>>),
|
||||
WordPairProximityDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetStringDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetNumberDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetExistsDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetIsNullDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
|
||||
GeoPoints(grenad::Reader<BufReader<File>>),
|
||||
VectorPoints(grenad::Reader<BufReader<File>>),
|
||||
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashSet;
|
||||
use std::io::BufReader;
|
||||
use std::io::{BufReader, BufWriter};
|
||||
|
||||
use grenad::CompressionType;
|
||||
use heed::types::ByteSlice;
|
||||
@ -119,9 +119,9 @@ pub fn insert_into_database(
|
||||
pub fn write_into_lmdb_database_without_merging(
|
||||
wtxn: &mut heed::RwTxn,
|
||||
database: heed::PolyDatabase,
|
||||
writer: grenad::Writer<std::fs::File>,
|
||||
writer: grenad::Writer<BufWriter<std::fs::File>>,
|
||||
) -> Result<()> {
|
||||
let file = writer.into_inner()?;
|
||||
let file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
|
||||
let reader = grenad::Reader::new(BufReader::new(file))?;
|
||||
if database.is_empty(wtxn)? {
|
||||
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
|
||||
|
@ -20,7 +20,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 at a [100, ]
|
||||
3 rings a [101, ]
|
||||
3 the a [101, ]
|
||||
4 at b [100, ]
|
||||
4 at be [100, ]
|
||||
4 bell a [101, ]
|
||||
|
||||
|
@ -30,10 +30,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 bell 5 [101, ]
|
||||
3 rings am [101, ]
|
||||
3 the at [101, ]
|
||||
4 an house [100, ]
|
||||
4 at beautiful [100, ]
|
||||
4 bell am [101, ]
|
||||
4 the 5 [101, ]
|
||||
5 at house [100, ]
|
||||
5 the am [101, ]
|
||||
|
||||
|
@ -28,8 +28,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 rings a [101, ]
|
||||
3 rings am [101, ]
|
||||
3 the a [101, ]
|
||||
4 at b [100, ]
|
||||
4 at be [100, ]
|
||||
4 bell a [101, ]
|
||||
4 bell am [101, ]
|
||||
|
||||
|
@ -7,5 +7,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
2 bell a [51, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 at a [50, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
@ -7,5 +7,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
2 bell a [51, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 at a [50, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 at a [50, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
@ -16,6 +16,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 at a [50, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 at b [50, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
@ -12,5 +12,4 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
||||
3 at a [50, ]
|
||||
3 rings a [51, ]
|
||||
3 the a [51, ]
|
||||
4 bell a [51, ]
|
||||
|
||||
|
@ -8,7 +8,7 @@ use Criterion::*;
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
macro_rules! test_distinct {
|
||||
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $criteria:expr, $n_res:expr) => {
|
||||
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $offset:expr, $criteria:expr, $n_res:expr) => {
|
||||
#[test]
|
||||
fn $func() {
|
||||
let criteria = $criteria;
|
||||
@ -27,6 +27,7 @@ macro_rules! test_distinct {
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit($limit);
|
||||
search.offset($offset);
|
||||
search.exhaustive_number_hits($exhaustive);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
@ -47,6 +48,7 @@ macro_rules! test_distinct {
|
||||
Some(d.id)
|
||||
}
|
||||
})
|
||||
.skip($offset)
|
||||
.take($limit)
|
||||
.collect();
|
||||
|
||||
@ -61,6 +63,7 @@ test_distinct!(
|
||||
tag,
|
||||
true,
|
||||
1,
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
3
|
||||
);
|
||||
@ -69,6 +72,7 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
true,
|
||||
1,
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
7
|
||||
);
|
||||
@ -77,6 +81,7 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
true,
|
||||
0,
|
||||
0,
|
||||
vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo],
|
||||
7
|
||||
);
|
||||
@ -86,6 +91,7 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
3
|
||||
);
|
||||
@ -94,6 +100,7 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
7
|
||||
);
|
||||
@ -102,6 +109,7 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words],
|
||||
3
|
||||
);
|
||||
@ -110,6 +118,7 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words],
|
||||
7
|
||||
);
|
||||
@ -118,6 +127,7 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo],
|
||||
3
|
||||
);
|
||||
@ -126,6 +136,7 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo],
|
||||
7
|
||||
);
|
||||
@ -134,6 +145,7 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Proximity],
|
||||
3
|
||||
);
|
||||
@ -142,6 +154,7 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Proximity],
|
||||
7
|
||||
);
|
||||
@ -150,6 +163,7 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Attribute],
|
||||
3
|
||||
);
|
||||
@ -158,6 +172,7 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Attribute],
|
||||
7
|
||||
);
|
||||
@ -166,6 +181,7 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Exactness],
|
||||
3
|
||||
);
|
||||
@ -174,6 +190,47 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Exactness],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
distinct_string_limit_and_offset,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
1,
|
||||
vec![],
|
||||
2
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
exhaustive_distinct_string_limit_and_offset,
|
||||
tag,
|
||||
true,
|
||||
1,
|
||||
2,
|
||||
vec![],
|
||||
1
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
distinct_number_limit_and_offset,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
2,
|
||||
vec![],
|
||||
5
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
exhaustive_distinct_number_limit_and_offset,
|
||||
asc_desc_rank,
|
||||
true,
|
||||
2,
|
||||
4,
|
||||
vec![],
|
||||
3
|
||||
);
|
||||
|
Reference in New Issue
Block a user