mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-26 07:40:31 +00:00
Compare commits
55 Commits
v1.7.0-rc.
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b7ed3308bb | ||
|
|
579a96adc7 | ||
|
|
e6ff45e3b9 | ||
|
|
e4f8ee00c8 | ||
|
|
d2f77e88bd | ||
|
|
1d8c13f595 | ||
|
|
7f3c495f5c | ||
|
|
ca4876fd10 | ||
|
|
ee3076d5ba | ||
|
|
ab1224bfa7 | ||
|
|
eefc1c421e | ||
|
|
4d42a7af7c | ||
|
|
7408db2a46 | ||
|
|
663629a9d6 | ||
|
|
15c38dca78 | ||
|
|
7ee20b0895 | ||
|
|
0c216048b5 | ||
|
|
36d17110d8 | ||
|
|
bdd428c22e | ||
|
|
b130917933 | ||
|
|
25f64ce7df | ||
|
|
adcd848809 | ||
|
|
eee46b7537 | ||
|
|
55f60a3638 | ||
|
|
c608b3f9b5 | ||
|
|
86ce843f3d | ||
|
|
b11df7ec34 | ||
|
|
6862caef64 | ||
|
|
f75c7ac979 | ||
|
|
eada6de261 | ||
|
|
f4a6261dea | ||
|
|
9806a3e5f6 | ||
|
|
a96b45dda7 | ||
|
|
452a343a2b | ||
|
|
b87485e80d | ||
|
|
147a67dc82 | ||
|
|
716ffc07ee | ||
|
|
b005eb3289 | ||
|
|
9e664d87eb | ||
|
|
6dcb5219a0 | ||
|
|
5e83bac448 | ||
|
|
0562818c2a | ||
|
|
a478392b7a | ||
|
|
bbf3fb88ca | ||
|
|
60510e037b | ||
|
|
36c27a18a1 | ||
|
|
1eb1c043b5 | ||
|
|
507739bd98 | ||
|
|
eb25b07390 | ||
|
|
066a7a3cde | ||
|
|
55796406c5 | ||
|
|
91cdd502f8 | ||
|
|
a493a50825 | ||
|
|
9d1f489a37 | ||
|
|
865b415b3f |
@@ -1,2 +1,2 @@
|
||||
[alias]
|
||||
xtask = "run --package xtask --"
|
||||
xtask = "run --release --package xtask --"
|
||||
|
||||
30
.github/workflows/bench-manual.yml
vendored
Normal file
30
.github/workflows/bench-manual.yml
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
name: Bench (manual)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
workload:
|
||||
description: 'The path to the workloads to execute (workloads/...)'
|
||||
required: true
|
||||
default: 'workloads/movies.json'
|
||||
|
||||
env:
|
||||
WORKLOAD_NAME: ${{ github.event.inputs.workload }}
|
||||
|
||||
jobs:
|
||||
benchmarks:
|
||||
name: Run and upload benchmarks
|
||||
runs-on: benchmarks
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
|
||||
run: |
|
||||
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Manual [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- ${WORKLOAD_NAME}
|
||||
|
||||
46
.github/workflows/bench-pr.yml
vendored
Normal file
46
.github/workflows/bench-pr.yml
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
name: Bench (PR)
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
|
||||
jobs:
|
||||
run-benchmarks-on-comment:
|
||||
if: startsWith(github.event.comment.body, '/bench')
|
||||
name: Run and upload benchmarks
|
||||
runs-on: benchmarks
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- name: Check for Command
|
||||
id: command
|
||||
uses: xt0rted/slash-command-action@v2
|
||||
with:
|
||||
command: bench
|
||||
reaction-type: "rocket"
|
||||
repo-token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: xt0rted/pull-request-comment-branch@v2
|
||||
id: comment-branch
|
||||
with:
|
||||
repo_token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
if: success()
|
||||
with:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Run benchmarks on PR ${{ github.event.issue.id }}
|
||||
run: |
|
||||
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.url }}) on [#${{github.event.issue.id}}](${{ github.event.issue.url }})" -- ${{ steps.command.outputs.command-arguments }}
|
||||
25
.github/workflows/bench-push-indexing.yml
vendored
Normal file
25
.github/workflows/bench-push-indexing.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
name: Indexing bench (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
benchmarks:
|
||||
name: Run and upload benchmarks
|
||||
runs-on: benchmarks
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
|
||||
run: |
|
||||
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Push on `main` [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- workloads/*.json
|
||||
|
||||
7
.github/workflows/test-suite.yml
vendored
7
.github/workflows/test-suite.yml
vendored
@@ -31,17 +31,10 @@ jobs:
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Setup test with Rust stable
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Setup test with Rust nightly
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo check without any default features
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -9,6 +9,8 @@
|
||||
/data.ms
|
||||
/snapshots
|
||||
/dumps
|
||||
/bench
|
||||
/_xtask_benchmark.ms
|
||||
|
||||
# Snapshots
|
||||
## ... large
|
||||
|
||||
268
Cargo.lock
generated
268
Cargo.lock
generated
@@ -356,9 +356,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.79"
|
||||
version = "1.0.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca"
|
||||
checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
@@ -440,6 +440,12 @@ dependencies = [
|
||||
"syn 2.0.48",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba"
|
||||
|
||||
[[package]]
|
||||
name = "atomic-polyfill"
|
||||
version = "0.1.11"
|
||||
@@ -490,7 +496,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -622,6 +628,15 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "build-info"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"time",
|
||||
"vergen-git2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.13.0"
|
||||
@@ -1342,7 +1357,16 @@ version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8"
|
||||
dependencies = [
|
||||
"derive_builder_macro",
|
||||
"derive_builder_macro 0.12.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f59169f400d8087f238c5c0c7db6a28af18681717f3b623227d92f397e938c7"
|
||||
dependencies = [
|
||||
"derive_builder_macro 0.13.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1357,13 +1381,35 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_core"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4ec317cc3e7ef0928b0ca6e4a634a4d6c001672ae210438cf114a83e56b018d"
|
||||
dependencies = [
|
||||
"darling 0.14.4",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_macro"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e"
|
||||
dependencies = [
|
||||
"derive_builder_core",
|
||||
"derive_builder_core 0.12.0",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder_macro"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "870368c3fb35b8031abb378861d4460f573b92238ec2152c927a21f77e3e0127"
|
||||
dependencies = [
|
||||
"derive_builder_core 0.13.1",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
@@ -1485,7 +1531,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -1723,11 +1769,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@@ -1745,7 +1792,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@@ -1765,7 +1812,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -1883,7 +1930,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap",
|
||||
@@ -2081,11 +2128,11 @@ checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
|
||||
|
||||
[[package]]
|
||||
name = "git2"
|
||||
version = "0.16.1"
|
||||
version = "0.18.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ccf7f68c2995f392c49fffb4f95ae2c873297830eb25c6bc4c114ce8f4562acc"
|
||||
checksum = "1b3ba52851e73b46a4c3df1d89343741112003f0f6f13beb0dfac9e457c3fdcd"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"bitflags 2.4.1",
|
||||
"libc",
|
||||
"libgit2-sys",
|
||||
"log",
|
||||
@@ -2101,8 +2148,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
[[package]]
|
||||
name = "grenad"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a007932af5475ebb5c63bef8812bb1c36f317983bb4ca663e9d6dd58d6a0f8c"
|
||||
source = "git+https://github.com/meilisearch/grenad.git?branch=keep-source-index-in-merger#5a7c10fcd689f5967a8979f6b66da1e0939439ff"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@@ -2375,14 +2421,14 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
"bincode",
|
||||
"crossbeam",
|
||||
"csv",
|
||||
"derive_builder",
|
||||
"derive_builder 0.12.0",
|
||||
"dump",
|
||||
"enum-iterator",
|
||||
"file-store",
|
||||
@@ -2393,6 +2439,7 @@ dependencies = [
|
||||
"meilisearch-types",
|
||||
"page_size 0.5.0",
|
||||
"puffin",
|
||||
"rayon",
|
||||
"roaring",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -2498,7 +2545,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"rustix 0.38.26",
|
||||
"rustix 0.38.31",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
@@ -2561,7 +2608,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -2620,15 +2667,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.150"
|
||||
version = "0.2.153"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
|
||||
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
|
||||
|
||||
[[package]]
|
||||
name = "libgit2-sys"
|
||||
version = "0.14.2+1.5.1"
|
||||
version = "0.16.2+1.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4"
|
||||
checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
@@ -2675,9 +2722,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libz-sys"
|
||||
version = "1.1.12"
|
||||
version = "1.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b"
|
||||
checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
@@ -3021,28 +3068,6 @@ version = "0.4.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
||||
|
||||
[[package]]
|
||||
name = "logging_timer"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64e96f261d684b7089aa576bb74e823241dccd994b27d30fabf1dcb3af284fe9"
|
||||
dependencies = [
|
||||
"log",
|
||||
"logging_timer_proc_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logging_timer_proc_macros"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "10a9062912d7952c5588cc474795e0b9ee008e7e6781127945b85413d4b99d81"
|
||||
dependencies = [
|
||||
"log",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4_flex"
|
||||
version = "0.10.0"
|
||||
@@ -3091,7 +3116,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@@ -3100,7 +3125,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@@ -3114,6 +3139,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"brotli",
|
||||
"bstr",
|
||||
"build-info",
|
||||
"byte-unit",
|
||||
"bytes",
|
||||
"cargo_toml",
|
||||
@@ -3185,7 +3211,6 @@ dependencies = [
|
||||
"url",
|
||||
"urlencoding",
|
||||
"uuid",
|
||||
"vergen",
|
||||
"walkdir",
|
||||
"yaup",
|
||||
"zip",
|
||||
@@ -3193,7 +3218,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"enum-iterator",
|
||||
@@ -3212,7 +3237,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@@ -3242,7 +3267,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilitool"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
@@ -3281,7 +3306,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"arroy",
|
||||
"big_s",
|
||||
@@ -3314,7 +3339,6 @@ dependencies = [
|
||||
"json-depth-checker",
|
||||
"levenshtein_automata",
|
||||
"liquid",
|
||||
"logging_timer",
|
||||
"maplit",
|
||||
"md5",
|
||||
"meili-snap",
|
||||
@@ -3486,6 +3510,12 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-conv"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.45"
|
||||
@@ -3516,6 +3546,15 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_threads"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "number_prefix"
|
||||
version = "0.4.0"
|
||||
@@ -3708,7 +3747,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@@ -4077,9 +4116,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.8.0"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
|
||||
checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051"
|
||||
dependencies = [
|
||||
"either",
|
||||
"rayon-core",
|
||||
@@ -4098,9 +4137,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.12.0"
|
||||
version = "1.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
|
||||
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
|
||||
dependencies = [
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
@@ -4130,15 +4169,6 @@ dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_users"
|
||||
version = "0.4.3"
|
||||
@@ -4216,10 +4246,12 @@ dependencies = [
|
||||
"system-configuration",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.1",
|
||||
"tokio-util",
|
||||
"tower-service",
|
||||
"url",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
"wasm-streams",
|
||||
"web-sys",
|
||||
"webpki-roots 0.25.3",
|
||||
"winreg",
|
||||
@@ -4327,9 +4359,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.26"
|
||||
version = "0.38.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9470c4bf8246c8daf25f9598dca807fb6510347b1e1cfa55749113850c79d88a"
|
||||
checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"errno",
|
||||
@@ -4865,14 +4897,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.9.0"
|
||||
version = "3.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa"
|
||||
checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"fastrand",
|
||||
"redox_syscall 0.4.1",
|
||||
"rustix 0.38.26",
|
||||
"rustix 0.38.31",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
@@ -4932,12 +4963,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.31"
|
||||
version = "0.3.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e"
|
||||
checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749"
|
||||
dependencies = [
|
||||
"deranged",
|
||||
"itoa",
|
||||
"libc",
|
||||
"num-conv",
|
||||
"num_threads",
|
||||
"powerfmt",
|
||||
"serde",
|
||||
"time-core",
|
||||
@@ -4952,10 +4986,11 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
|
||||
|
||||
[[package]]
|
||||
name = "time-macros"
|
||||
version = "0.2.16"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26197e33420244aeb70c3e8c78376ca46571bc4e701e4791c2cd9f57dcb3a43f"
|
||||
checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774"
|
||||
dependencies = [
|
||||
"num-conv",
|
||||
"time-core",
|
||||
]
|
||||
|
||||
@@ -4990,7 +5025,7 @@ version = "0.14.1"
|
||||
source = "git+https://github.com/huggingface/tokenizers.git?tag=v0.14.1#6357206cdcce4d78ffb1e0372feb456caea09375"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"derive_builder",
|
||||
"derive_builder 0.12.0",
|
||||
"esaxx-rs",
|
||||
"getrandom",
|
||||
"itertools 0.11.0",
|
||||
@@ -5393,10 +5428,11 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.6.1"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560"
|
||||
checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a"
|
||||
dependencies = [
|
||||
"atomic",
|
||||
"getrandom",
|
||||
"serde",
|
||||
]
|
||||
@@ -5415,18 +5451,42 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "vergen"
|
||||
version = "7.5.1"
|
||||
version = "9.0.0-beta.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f21b881cd6636ece9735721cf03c1fe1e774fe258683d084bb2812ab67435749"
|
||||
checksum = "107dc53b443fe8cc380798abb75ad6b7038281165109afea1f1b28bb47047ed5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cfg-if",
|
||||
"enum-iterator",
|
||||
"derive_builder 0.13.1",
|
||||
"getset",
|
||||
"rustversion",
|
||||
"vergen-lib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "vergen-git2"
|
||||
version = "1.0.0-beta.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8875c5d71074bb67118774e3d795ab6fe77c3ae3161cb54e19104cabc49487f1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"derive_builder 0.13.1",
|
||||
"git2",
|
||||
"rustversion",
|
||||
"thiserror",
|
||||
"time",
|
||||
"vergen",
|
||||
"vergen-lib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "vergen-lib"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26ebfba72ba904559f25f41ea1512335b5a46459084258cea0857549d9645187"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"derive_builder 0.13.1",
|
||||
"getset",
|
||||
"rustversion",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5537,6 +5597,19 @@ version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-streams"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4609d447824375f43e1ffbc051b50ad8f4b3ae8219680c94452ea05eb240ac7"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wav"
|
||||
version = "1.0.0"
|
||||
@@ -5841,9 +5914,9 @@ checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.5.4"
|
||||
version = "0.5.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acaaa1190073b2b101e15083c38ee8ec891b5e05cbee516521e94ec008f61e64"
|
||||
checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
@@ -5869,10 +5942,25 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"build-info",
|
||||
"cargo_metadata",
|
||||
"clap",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"sysinfo",
|
||||
"time",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"tracing-trace",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -17,11 +17,11 @@ members = [
|
||||
"benchmarks",
|
||||
"fuzzers",
|
||||
"tracing-trace",
|
||||
"xtask",
|
||||
"xtask", "build-info",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -8,7 +8,7 @@ WORKDIR /
|
||||
ARG COMMIT_SHA
|
||||
ARG COMMIT_DATE
|
||||
ARG GIT_TAG
|
||||
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
|
||||
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_DESCRIBE=${GIT_TAG}
|
||||
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
||||
|
||||
COPY . .
|
||||
|
||||
18
build-info/Cargo.toml
Normal file
18
build-info/Cargo.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "build-info"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
description.workspace = true
|
||||
homepage.workspace = true
|
||||
readme.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.34", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.80"
|
||||
vergen-git2 = "1.0.0-beta.2"
|
||||
22
build-info/build.rs
Normal file
22
build-info/build.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
fn main() {
|
||||
if let Err(err) = emit_git_variables() {
|
||||
println!("cargo:warning=vergen: {}", err);
|
||||
}
|
||||
}
|
||||
|
||||
fn emit_git_variables() -> anyhow::Result<()> {
|
||||
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
|
||||
// in the corresponding GitHub workflow (publish_docker.yml).
|
||||
// This is due to the Dockerfile building the binary outside of the git directory.
|
||||
let mut builder = vergen_git2::Git2Builder::default();
|
||||
|
||||
builder.branch(true);
|
||||
builder.commit_timestamp(true);
|
||||
builder.commit_message(true);
|
||||
builder.describe(true, true, None);
|
||||
builder.sha(false);
|
||||
|
||||
let git2 = builder.build()?;
|
||||
|
||||
vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
|
||||
}
|
||||
203
build-info/src/lib.rs
Normal file
203
build-info/src/lib.rs
Normal file
@@ -0,0 +1,203 @@
|
||||
use time::format_description::well_known::Iso8601;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BuildInfo {
|
||||
pub branch: Option<&'static str>,
|
||||
pub describe: Option<DescribeResult>,
|
||||
pub commit_sha1: Option<&'static str>,
|
||||
pub commit_msg: Option<&'static str>,
|
||||
pub commit_timestamp: Option<time::OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl BuildInfo {
|
||||
pub fn from_build() -> Self {
|
||||
let branch: Option<&'static str> = option_env!("VERGEN_GIT_BRANCH");
|
||||
let describe = DescribeResult::from_build();
|
||||
let commit_sha1 = option_env!("VERGEN_GIT_SHA");
|
||||
let commit_msg = option_env!("VERGEN_GIT_COMMIT_MESSAGE");
|
||||
let commit_timestamp = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP");
|
||||
|
||||
let commit_timestamp = commit_timestamp.and_then(|commit_timestamp| {
|
||||
time::OffsetDateTime::parse(commit_timestamp, &Iso8601::DEFAULT).ok()
|
||||
});
|
||||
|
||||
Self { branch, describe, commit_sha1, commit_msg, commit_timestamp }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum DescribeResult {
|
||||
Prototype { name: &'static str },
|
||||
Release { version: &'static str, major: u64, minor: u64, patch: u64 },
|
||||
Prerelease { version: &'static str, major: u64, minor: u64, patch: u64, rc: u64 },
|
||||
NotATag { describe: &'static str },
|
||||
}
|
||||
|
||||
impl DescribeResult {
|
||||
pub fn new(describe: &'static str) -> Self {
|
||||
if let Some(name) = prototype_name(describe) {
|
||||
Self::Prototype { name }
|
||||
} else if let Some(release) = release_version(describe) {
|
||||
release
|
||||
} else if let Some(prerelease) = prerelease_version(describe) {
|
||||
prerelease
|
||||
} else {
|
||||
Self::NotATag { describe }
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_build() -> Option<Self> {
|
||||
let describe: &'static str = option_env!("VERGEN_GIT_DESCRIBE")?;
|
||||
Some(Self::new(describe))
|
||||
}
|
||||
|
||||
pub fn as_tag(&self) -> Option<&'static str> {
|
||||
match self {
|
||||
DescribeResult::Prototype { name } => Some(name),
|
||||
DescribeResult::Release { version, .. } => Some(version),
|
||||
DescribeResult::Prerelease { version, .. } => Some(version),
|
||||
DescribeResult::NotATag { describe: _ } => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_prototype(&self) -> Option<&'static str> {
|
||||
match self {
|
||||
DescribeResult::Prototype { name } => Some(name),
|
||||
DescribeResult::Release { .. }
|
||||
| DescribeResult::Prerelease { .. }
|
||||
| DescribeResult::NotATag { .. } => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the input as a prototype name.
|
||||
///
|
||||
/// Returns `Some(prototype_name)` if the following conditions are met on this value:
|
||||
///
|
||||
/// 1. starts with `prototype-`,
|
||||
/// 2. ends with `-<some_number>`,
|
||||
/// 3. does not end with `<some_number>-<some_number>`.
|
||||
///
|
||||
/// Otherwise, returns `None`.
|
||||
fn prototype_name(describe: &'static str) -> Option<&'static str> {
|
||||
if !describe.starts_with("prototype-") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut rsplit_prototype = describe.rsplit('-');
|
||||
// last component MUST be a number
|
||||
rsplit_prototype.next()?.parse::<u64>().ok()?;
|
||||
// before than last component SHALL NOT be a number
|
||||
rsplit_prototype.next()?.parse::<u64>().err()?;
|
||||
|
||||
Some(describe)
|
||||
}
|
||||
|
||||
fn release_version(describe: &'static str) -> Option<DescribeResult> {
|
||||
if !describe.starts_with('v') {
|
||||
return None;
|
||||
}
|
||||
|
||||
// full release version don't contain a `-`
|
||||
if describe.contains('-') {
|
||||
return None;
|
||||
}
|
||||
|
||||
// full release version parse as vX.Y.Z, with X, Y, Z numbers.
|
||||
let mut dots = describe[1..].split('.');
|
||||
let major: u64 = dots.next()?.parse().ok()?;
|
||||
let minor: u64 = dots.next()?.parse().ok()?;
|
||||
let patch: u64 = dots.next()?.parse().ok()?;
|
||||
|
||||
if dots.next().is_some() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(DescribeResult::Release { version: describe, major, minor, patch })
|
||||
}
|
||||
|
||||
fn prerelease_version(describe: &'static str) -> Option<DescribeResult> {
|
||||
// prerelease version is in the shape vM.N.P-rc.C
|
||||
let mut hyphen = describe.rsplit('-');
|
||||
let prerelease = hyphen.next()?;
|
||||
if !prerelease.starts_with("rc.") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let rc: u64 = prerelease[3..].parse().ok()?;
|
||||
|
||||
let release = hyphen.next()?;
|
||||
|
||||
let DescribeResult::Release { version: _, major, minor, patch } = release_version(release)?
|
||||
else {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some(DescribeResult::Prerelease { version: describe, major, minor, patch, rc })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::DescribeResult;
|
||||
|
||||
fn assert_not_a_tag(describe: &'static str) {
|
||||
assert_eq!(DescribeResult::NotATag { describe }, DescribeResult::new(describe))
|
||||
}
|
||||
|
||||
fn assert_proto(describe: &'static str) {
|
||||
assert_eq!(DescribeResult::Prototype { name: describe }, DescribeResult::new(describe))
|
||||
}
|
||||
|
||||
fn assert_release(describe: &'static str, major: u64, minor: u64, patch: u64) {
|
||||
assert_eq!(
|
||||
DescribeResult::Release { version: describe, major, minor, patch },
|
||||
DescribeResult::new(describe)
|
||||
)
|
||||
}
|
||||
|
||||
fn assert_prerelease(describe: &'static str, major: u64, minor: u64, patch: u64, rc: u64) {
|
||||
assert_eq!(
|
||||
DescribeResult::Prerelease { version: describe, major, minor, patch, rc },
|
||||
DescribeResult::new(describe)
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_a_tag() {
|
||||
assert_not_a_tag("whatever-fuzzy");
|
||||
assert_not_a_tag("whatever-fuzzy-5-ggg-dirty");
|
||||
assert_not_a_tag("whatever-fuzzy-120-ggg-dirty");
|
||||
|
||||
// technically a tag, but not a proto nor a version, so not parsed as a tag
|
||||
assert_not_a_tag("whatever");
|
||||
|
||||
// dirty version
|
||||
assert_not_a_tag("v1.7.0-1-ggga-dirty");
|
||||
assert_not_a_tag("v1.7.0-rc.1-1-ggga-dirty");
|
||||
|
||||
// after version
|
||||
assert_not_a_tag("v1.7.0-1-ggga");
|
||||
assert_not_a_tag("v1.7.0-rc.1-1-ggga");
|
||||
|
||||
// after proto
|
||||
assert_not_a_tag("protoype-tag-0-1-ggga");
|
||||
assert_not_a_tag("protoype-tag-0-1-ggga-dirty");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prototype() {
|
||||
assert_proto("prototype-tag-0");
|
||||
assert_proto("prototype-tag-10");
|
||||
assert_proto("prototype-long-name-tag-10");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn release() {
|
||||
assert_release("v1.7.2", 1, 7, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prerelease() {
|
||||
assert_prerelease("v1.7.2-rc.3", 1, 7, 2, 3);
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::convert::TryInto;
|
||||
use std::str::FromStr;
|
||||
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@@ -13,6 +13,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs::File as StdFile;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -22,20 +22,6 @@ pub enum Error {
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl Deref for File {
|
||||
type Target = NamedTempFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for File {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.file
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FileStore {
|
||||
path: PathBuf,
|
||||
@@ -56,7 +42,7 @@ impl FileStore {
|
||||
let file = NamedTempFile::new_in(&self.path)?;
|
||||
let uuid = Uuid::new_v4();
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let update_file = File { file, path };
|
||||
let update_file = File { file: Some(file), path };
|
||||
|
||||
Ok((uuid, update_file))
|
||||
}
|
||||
@@ -67,7 +53,7 @@ impl FileStore {
|
||||
let file = NamedTempFile::new_in(&self.path)?;
|
||||
let uuid = Uuid::from_u128(uuid);
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let update_file = File { file, path };
|
||||
let update_file = File { file: Some(file), path };
|
||||
|
||||
Ok((uuid, update_file))
|
||||
}
|
||||
@@ -75,7 +61,13 @@ impl FileStore {
|
||||
/// Returns the file corresponding to the requested uuid.
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> {
|
||||
let path = self.get_update_path(uuid);
|
||||
let file = StdFile::open(path)?;
|
||||
let file = match StdFile::open(path) {
|
||||
Ok(file) => file,
|
||||
Err(e) => {
|
||||
tracing::error!("Can't access update file {uuid}: {e}");
|
||||
return Err(e.into());
|
||||
}
|
||||
};
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
@@ -110,8 +102,12 @@ impl FileStore {
|
||||
|
||||
pub fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
std::fs::remove_file(path)?;
|
||||
Ok(())
|
||||
if let Err(e) = std::fs::remove_file(path) {
|
||||
tracing::error!("Can't delete file {uuid}: {e}");
|
||||
Err(e.into())
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// List the Uuids of the files in the FileStore
|
||||
@@ -136,16 +132,40 @@ impl FileStore {
|
||||
|
||||
pub struct File {
|
||||
path: PathBuf,
|
||||
file: NamedTempFile,
|
||||
file: Option<NamedTempFile>,
|
||||
}
|
||||
|
||||
impl File {
|
||||
pub fn dry_file() -> Result<Self> {
|
||||
Ok(Self { path: PathBuf::new(), file: None })
|
||||
}
|
||||
|
||||
pub fn persist(self) -> Result<()> {
|
||||
self.file.persist(&self.path)?;
|
||||
if let Some(file) = self.file {
|
||||
file.persist(&self.path)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Write for File {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
if let Some(file) = self.file.as_mut() {
|
||||
file.write(buf)
|
||||
} else {
|
||||
Ok(buf.len())
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
if let Some(file) = self.file.as_mut() {
|
||||
file.flush()
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::io::Write;
|
||||
|
||||
@@ -23,6 +23,7 @@ meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
||||
rayon = "1.8.1"
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
|
||||
@@ -142,22 +142,28 @@ pub(crate) enum IndexOperation {
|
||||
|
||||
impl Batch {
|
||||
/// Return the task ids associated with this batch.
|
||||
pub fn ids(&self) -> Vec<TaskId> {
|
||||
pub fn ids(&self) -> RoaringBitmap {
|
||||
match self {
|
||||
Batch::TaskCancelation { task, .. }
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::IndexUpdate { task, .. } => vec![task.uid],
|
||||
| Batch::IndexUpdate { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
Batch::SnapshotCreation(tasks)
|
||||
| Batch::TaskDeletions(tasks)
|
||||
| Batch::IndexDeletion { tasks, .. } => tasks.iter().map(|task| task.uid).collect(),
|
||||
| Batch::IndexDeletion { tasks, .. } => {
|
||||
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
||||
}
|
||||
Batch::IndexOperation { op, .. } => match op {
|
||||
IndexOperation::DocumentOperation { tasks, .. }
|
||||
| IndexOperation::Settings { tasks, .. }
|
||||
| IndexOperation::DocumentClear { tasks, .. } => {
|
||||
tasks.iter().map(|task| task.uid).collect()
|
||||
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
||||
}
|
||||
IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
IndexOperation::IndexDocumentDeletionByFilter { task, .. } => vec![task.uid],
|
||||
IndexOperation::SettingsAndDocumentOperation {
|
||||
document_import_tasks: tasks,
|
||||
settings_tasks: other,
|
||||
@@ -167,9 +173,11 @@ impl Batch {
|
||||
cleared_tasks: tasks,
|
||||
settings_tasks: other,
|
||||
..
|
||||
} => tasks.iter().chain(other).map(|task| task.uid).collect(),
|
||||
} => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
|
||||
},
|
||||
Batch::IndexSwap { task } => vec![task.uid],
|
||||
Batch::IndexSwap { task } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -953,7 +961,22 @@ impl IndexScheduler {
|
||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||
|
||||
let mut tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||
|
||||
if index.is_corrupted(&index_wtxn)? {
|
||||
tracing::error!("Aborting task due to corrupted index");
|
||||
index_wtxn.abort();
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(Error::CorruptedIndex.into());
|
||||
}
|
||||
|
||||
return Ok(tasks);
|
||||
}
|
||||
|
||||
index.check_document_facet_consistency(&index_wtxn)?.check();
|
||||
|
||||
index_wtxn.commit()?;
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
@@ -1331,6 +1354,7 @@ impl IndexScheduler {
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let deleted_documents = delete_document_by_filter(
|
||||
index_wtxn,
|
||||
filter,
|
||||
|
||||
@@ -48,6 +48,8 @@ impl From<DateField> for Code {
|
||||
pub enum Error {
|
||||
#[error("{1}")]
|
||||
WithCustomErrorCode(Code, Box<Self>),
|
||||
#[error("Received bad task id: {received} should be >= to {expected}.")]
|
||||
BadTaskId { received: TaskId, expected: TaskId },
|
||||
#[error("Index `{0}` not found.")]
|
||||
IndexNotFound(String),
|
||||
#[error("Index `{0}` already exists.")]
|
||||
@@ -136,6 +138,8 @@ pub enum Error {
|
||||
CreateBatch(Box<Self>),
|
||||
#[error("Corrupted task queue.")]
|
||||
CorruptedTaskQueue,
|
||||
#[error("Corrupted index.")]
|
||||
CorruptedIndex,
|
||||
#[error(transparent)]
|
||||
TaskDatabaseUpdate(Box<Self>),
|
||||
#[error(transparent)]
|
||||
@@ -161,6 +165,7 @@ impl Error {
|
||||
match self {
|
||||
Error::IndexNotFound(_)
|
||||
| Error::WithCustomErrorCode(_, _)
|
||||
| Error::BadTaskId { .. }
|
||||
| Error::IndexAlreadyExists(_)
|
||||
| Error::SwapDuplicateIndexFound(_)
|
||||
| Error::SwapDuplicateIndexesFound(_)
|
||||
@@ -189,6 +194,7 @@ impl Error {
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
| Error::CorruptedIndex
|
||||
| Error::TaskDatabaseUpdate(_)
|
||||
| Error::HeedTransaction(_) => false,
|
||||
#[cfg(test)]
|
||||
@@ -205,6 +211,7 @@ impl ErrorCode for Error {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
Error::WithCustomErrorCode(code, _) => *code,
|
||||
Error::BadTaskId { .. } => Code::BadRequest,
|
||||
Error::IndexNotFound(_) => Code::IndexNotFound,
|
||||
Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
|
||||
Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,
|
||||
@@ -238,6 +245,7 @@ impl ErrorCode for Error {
|
||||
Error::CorruptedDump => Code::Internal,
|
||||
Error::TaskDatabaseUpdate(_) => Code::Internal,
|
||||
Error::CreateBatch(_) => Code::Internal,
|
||||
Error::CorruptedIndex => Code::Internal,
|
||||
|
||||
// This one should never be seen by the end user
|
||||
Error::AbortedTask => Code::Internal,
|
||||
|
||||
@@ -15,6 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
|
||||
let IndexScheduler {
|
||||
autobatching_enabled,
|
||||
cleanup_enabled: _,
|
||||
must_stop_processing: _,
|
||||
processing_tasks,
|
||||
file_store,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,90 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 0,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 1,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": {
|
||||
"message": "Index `doggo` already exists.",
|
||||
"code": "index_already_exists",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_already_exists"
|
||||
},
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "failed",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 2,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 3,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,90 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 0,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 1,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": {
|
||||
"message": "Index `doggo` already exists.",
|
||||
"code": "index_already_exists",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_already_exists"
|
||||
},
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "failed",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 2,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 3,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -1,5 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::HashSet;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::fmt::{self, Debug, Display};
|
||||
use std::fs::File;
|
||||
use std::io::{self, Seek, Write};
|
||||
use std::io::{self, BufWriter, Write};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use memmap2::MmapOptions;
|
||||
@@ -104,8 +104,8 @@ impl ErrorCode for DocumentFormatError {
|
||||
}
|
||||
|
||||
/// Reads CSV from input and write an obkv batch to writer.
|
||||
pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result<u64> {
|
||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
||||
pub fn read_csv(file: &File, writer: impl Write, delimiter: u8) -> Result<u64> {
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
|
||||
let mmap = unsafe { MmapOptions::new().map(file)? };
|
||||
let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
|
||||
builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
|
||||
@@ -116,9 +116,9 @@ pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result
|
||||
Ok(count as u64)
|
||||
}
|
||||
|
||||
/// Reads JSON from temporary file and write an obkv batch to writer.
|
||||
pub fn read_json(file: &File, writer: impl Write + Seek) -> Result<u64> {
|
||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
||||
/// Reads JSON from temporary file and write an obkv batch to writer.
|
||||
pub fn read_json(file: &File, writer: impl Write) -> Result<u64> {
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
|
||||
let mmap = unsafe { MmapOptions::new().map(file)? };
|
||||
let mut deserializer = serde_json::Deserializer::from_slice(&mmap);
|
||||
|
||||
@@ -151,8 +151,8 @@ pub fn read_json(file: &File, writer: impl Write + Seek) -> Result<u64> {
|
||||
}
|
||||
|
||||
/// Reads JSON from temporary file and write an obkv batch to writer.
|
||||
pub fn read_ndjson(file: &File, writer: impl Write + Seek) -> Result<u64> {
|
||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
||||
pub fn read_ndjson(file: &File, writer: impl Write) -> Result<u64> {
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
|
||||
let mmap = unsafe { MmapOptions::new().map(file)? };
|
||||
|
||||
for result in serde_json::Deserializer::from_slice(&mmap).into_iter() {
|
||||
|
||||
@@ -107,6 +107,7 @@ tracing = "0.1.40"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.9"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.9.0"
|
||||
@@ -131,7 +132,6 @@ reqwest = { version = "0.11.23", features = [
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
tempfile = { version = "3.9.0", optional = true }
|
||||
vergen = { version = "7.5.1", default-features = false, features = ["git"] }
|
||||
zip = { version = "0.6.6", optional = true }
|
||||
|
||||
[features]
|
||||
|
||||
@@ -1,17 +1,4 @@
|
||||
use vergen::{vergen, Config, SemverKind};
|
||||
|
||||
fn main() {
|
||||
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
|
||||
// in the corresponding GitHub workflow (publish_docker.yml).
|
||||
// This is due to the Dockerfile building the binary outside of the git directory.
|
||||
let mut config = Config::default();
|
||||
// allow using non-annotated tags
|
||||
*config.git_mut().semver_kind_mut() = SemverKind::Lightweight;
|
||||
|
||||
if let Err(e) = vergen(config) {
|
||||
println!("cargo:warning=vergen: {}", e);
|
||||
}
|
||||
|
||||
#[cfg(feature = "mini-dashboard")]
|
||||
mini_dashboard::setup_mini_dashboard().expect("Could not load the mini-dashboard assets");
|
||||
}
|
||||
|
||||
@@ -253,9 +253,11 @@ struct Infos {
|
||||
env: String,
|
||||
experimental_enable_metrics: bool,
|
||||
experimental_logs_mode: LogMode,
|
||||
experimental_replication_parameters: bool,
|
||||
experimental_enable_logs_route: bool,
|
||||
experimental_reduce_indexing_memory_usage: bool,
|
||||
experimental_max_number_of_batched_tasks: usize,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
dump_dir: bool,
|
||||
@@ -292,6 +294,7 @@ impl From<Opt> for Infos {
|
||||
db_path,
|
||||
experimental_enable_metrics,
|
||||
experimental_logs_mode,
|
||||
experimental_replication_parameters,
|
||||
experimental_enable_logs_route,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
@@ -340,8 +343,10 @@ impl From<Opt> for Infos {
|
||||
env,
|
||||
experimental_enable_metrics,
|
||||
experimental_logs_mode,
|
||||
experimental_replication_parameters,
|
||||
experimental_enable_logs_route,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
||||
db_path: db_path != PathBuf::from("./data.ms"),
|
||||
import_dump: import_dump.is_some(),
|
||||
dump_dir: dump_dir != PathBuf::from("dumps/"),
|
||||
@@ -468,7 +473,9 @@ impl Segment {
|
||||
create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default())
|
||||
{
|
||||
// Replace the version number with the prototype name if any.
|
||||
let version = if let Some(prototype) = crate::prototype_name() {
|
||||
let version = if let Some(prototype) = build_info::DescribeResult::from_build()
|
||||
.and_then(|describe| describe.as_prototype())
|
||||
{
|
||||
prototype
|
||||
} else {
|
||||
env!("CARGO_PKG_VERSION")
|
||||
|
||||
@@ -131,6 +131,7 @@ gen_seq! { SeqFromRequestFut3; A B C }
|
||||
gen_seq! { SeqFromRequestFut4; A B C D }
|
||||
gen_seq! { SeqFromRequestFut5; A B C D E }
|
||||
gen_seq! { SeqFromRequestFut6; A B C D E F }
|
||||
gen_seq! { SeqFromRequestFut7; A B C D E F G }
|
||||
|
||||
pin_project! {
|
||||
#[project = ExtractProj]
|
||||
|
||||
@@ -265,7 +265,9 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
.name(String::from("register-snapshot-tasks"))
|
||||
.spawn(move || loop {
|
||||
thread::sleep(snapshot_delay);
|
||||
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
|
||||
if let Err(e) =
|
||||
index_scheduler.register(KindWithContent::SnapshotCreation, None, false)
|
||||
{
|
||||
error!("Error while registering snapshot: {}", e);
|
||||
}
|
||||
})
|
||||
@@ -300,6 +302,7 @@ fn open_or_create_database_unchecked(
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
@@ -423,6 +426,9 @@ fn import_dump(
|
||||
let reader = BufReader::new(file);
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
let embedder_configs = index.embedding_configs(&wtxn)?;
|
||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
@@ -435,6 +441,8 @@ fn import_dump(
|
||||
|| false,
|
||||
)?;
|
||||
|
||||
let builder = builder.with_embedders(embedders);
|
||||
|
||||
let (builder, user_result) = builder.add_documents(reader)?;
|
||||
let user_result = user_result?;
|
||||
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
|
||||
@@ -468,6 +476,7 @@ pub fn configure_data(
|
||||
.app_data(web::Data::from(analytics))
|
||||
.app_data(web::Data::new(logs_route))
|
||||
.app_data(web::Data::new(logs_stderr))
|
||||
.app_data(web::Data::new(opt.clone()))
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
.limit(http_payload_size_limit)
|
||||
@@ -527,30 +536,3 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
|
||||
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
|
||||
config.service(web::resource("/").route(web::get().to(routes::running)));
|
||||
}
|
||||
|
||||
/// Parses the output of
|
||||
/// [`VERGEN_GIT_SEMVER_LIGHTWEIGHT`](https://docs.rs/vergen/latest/vergen/struct.Git.html#instructions)
|
||||
/// as a prototype name.
|
||||
///
|
||||
/// Returns `Some(prototype_name)` if the following conditions are met on this value:
|
||||
///
|
||||
/// 1. starts with `prototype-`,
|
||||
/// 2. ends with `-<some_number>`,
|
||||
/// 3. does not end with `<some_number>-<some_number>`.
|
||||
///
|
||||
/// Otherwise, returns `None`.
|
||||
pub fn prototype_name() -> Option<&'static str> {
|
||||
let prototype: &'static str = option_env!("VERGEN_GIT_SEMVER_LIGHTWEIGHT")?;
|
||||
|
||||
if !prototype.starts_with("prototype-") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut rsplit_prototype = prototype.rsplit('-');
|
||||
// last component MUST be a number
|
||||
rsplit_prototype.next()?.parse::<u64>().ok()?;
|
||||
// before than last component SHALL NOT be a number
|
||||
rsplit_prototype.next()?.parse::<u64>().err()?;
|
||||
|
||||
Some(prototype)
|
||||
}
|
||||
|
||||
@@ -12,8 +12,8 @@ use is_terminal::IsTerminal;
|
||||
use meilisearch::analytics::Analytics;
|
||||
use meilisearch::option::LogMode;
|
||||
use meilisearch::{
|
||||
analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType,
|
||||
LogStderrHandle, LogStderrType, Opt, SubscriberForSecondLayer,
|
||||
analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
|
||||
LogStderrType, Opt, SubscriberForSecondLayer,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use mimalloc::MiMalloc;
|
||||
@@ -74,6 +74,9 @@ fn on_panic(info: &std::panic::PanicInfo) {
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
std::env::var("MEILI_LOUIS_PUSHOVER_USER").expect("MEILI_LOUIS_PUSHOVER_USER not set");
|
||||
std::env::var("MEILI_LOUIS_PUSHOVER_APP").expect("MEILI_LOUIS_PUSHOVER_APP not set");
|
||||
|
||||
std::panic::set_hook(Box::new(on_panic));
|
||||
|
||||
anyhow::ensure!(
|
||||
@@ -163,8 +166,8 @@ pub fn print_launch_resume(
|
||||
analytics: Arc<dyn Analytics>,
|
||||
config_read_from: Option<PathBuf>,
|
||||
) {
|
||||
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
|
||||
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
|
||||
let build_info = build_info::BuildInfo::from_build();
|
||||
|
||||
let protocol =
|
||||
if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { "https" } else { "http" };
|
||||
let ascii_name = r#"
|
||||
@@ -189,10 +192,18 @@ pub fn print_launch_resume(
|
||||
eprintln!("Database path:\t\t{:?}", opt.db_path);
|
||||
eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr);
|
||||
eprintln!("Environment:\t\t{:?}", opt.env);
|
||||
eprintln!("Commit SHA:\t\t{:?}", commit_sha.to_string());
|
||||
eprintln!("Commit date:\t\t{:?}", commit_date.to_string());
|
||||
eprintln!("Commit SHA:\t\t{:?}", build_info.commit_sha1.unwrap_or("unknown"));
|
||||
eprintln!(
|
||||
"Commit date:\t\t{:?}",
|
||||
build_info
|
||||
.commit_timestamp
|
||||
.and_then(|commit_timestamp| commit_timestamp
|
||||
.format(&time::format_description::well_known::Rfc3339)
|
||||
.ok())
|
||||
.unwrap_or("unknown".into())
|
||||
);
|
||||
eprintln!("Package version:\t{:?}", env!("CARGO_PKG_VERSION").to_string());
|
||||
if let Some(prototype) = prototype_name() {
|
||||
if let Some(prototype) = build_info.describe.and_then(|describe| describe.as_prototype()) {
|
||||
eprintln!("Prototype:\t\t{:?}", prototype);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::convert::TryFrom;
|
||||
use std::env::VarError;
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt::Display;
|
||||
@@ -52,6 +51,7 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
|
||||
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
|
||||
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
||||
const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE";
|
||||
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||
@@ -358,6 +358,16 @@ pub struct Opt {
|
||||
#[serde(default)]
|
||||
pub experimental_enable_logs_route: bool,
|
||||
|
||||
/// Enable multiple features that helps you to run meilisearch in a replicated context.
|
||||
/// For more information, see: <https://github.com/orgs/meilisearch/discussions/725>
|
||||
///
|
||||
/// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now
|
||||
/// - Lets you specify a custom task ID upon registering a task
|
||||
/// - Lets you execute dry-register a task (get an answer from the route but nothing is actually registered in meilisearch and it won't be processed)
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS)]
|
||||
#[serde(default)]
|
||||
pub experimental_replication_parameters: bool,
|
||||
|
||||
/// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
|
||||
#[serde(default)]
|
||||
@@ -465,6 +475,7 @@ impl Opt {
|
||||
experimental_enable_metrics,
|
||||
experimental_logs_mode,
|
||||
experimental_enable_logs_route,
|
||||
experimental_replication_parameters,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
@@ -525,6 +536,10 @@ impl Opt {
|
||||
MEILI_EXPERIMENTAL_LOGS_MODE,
|
||||
experimental_logs_mode.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS,
|
||||
experimental_replication_parameters.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE,
|
||||
experimental_enable_logs_route.to_string(),
|
||||
|
||||
@@ -10,7 +10,7 @@ use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey};
|
||||
use meilisearch_types::keys::{CreateApiKey, Key, PatchApiKey};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -11,7 +11,8 @@ use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::Opt;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
|
||||
@@ -21,6 +22,7 @@ pub async fn create_dump(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
|
||||
@@ -29,8 +31,12 @@ pub async fn create_dump(
|
||||
keys: auth_controller.list_keys()?,
|
||||
instance_uid: analytics.instance_uid().cloned(),
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Create dump");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -7,7 +7,7 @@ use bstr::ByteSlice as _;
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
use futures::StreamExt;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use index_scheduler::{IndexScheduler, TaskId};
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
||||
@@ -36,8 +36,11 @@ use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||
use crate::routes::{
|
||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||
};
|
||||
use crate::search::parse_filter;
|
||||
use crate::Opt;
|
||||
|
||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
|
||||
@@ -119,6 +122,7 @@ pub async fn delete_document(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
path: web::Path<DocumentParam>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let DocumentParam { index_uid, document_id } = path.into_inner();
|
||||
@@ -130,9 +134,13 @@ pub async fn delete_document(
|
||||
index_uid: index_uid.to_string(),
|
||||
documents_ids: vec![document_id],
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
debug!(returns = ?task, "Delete document");
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -267,6 +275,7 @@ pub async fn replace_documents(
|
||||
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
|
||||
body: Payload,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
@@ -277,6 +286,8 @@ pub async fn replace_documents(
|
||||
analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
@@ -285,6 +296,8 @@ pub async fn replace_documents(
|
||||
params.csv_delimiter,
|
||||
body,
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
uid,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
@@ -299,6 +312,7 @@ pub async fn update_documents(
|
||||
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
|
||||
body: Payload,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
@@ -309,6 +323,8 @@ pub async fn update_documents(
|
||||
analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
@@ -317,6 +333,8 @@ pub async fn update_documents(
|
||||
params.csv_delimiter,
|
||||
body,
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
uid,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
@@ -334,6 +352,8 @@ async fn document_addition(
|
||||
csv_delimiter: Option<u8>,
|
||||
mut body: Payload,
|
||||
method: IndexDocumentsMethod,
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
allow_index_creation: bool,
|
||||
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
|
||||
let format = match (
|
||||
@@ -366,7 +386,7 @@ async fn document_addition(
|
||||
}
|
||||
};
|
||||
|
||||
let (uuid, mut update_file) = index_scheduler.create_update_file()?;
|
||||
let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?;
|
||||
|
||||
let temp_file = match tempfile() {
|
||||
Ok(file) => file,
|
||||
@@ -405,11 +425,9 @@ async fn document_addition(
|
||||
let read_file = buffer.into_inner().into_std().await;
|
||||
let documents_count = tokio::task::spawn_blocking(move || {
|
||||
let documents_count = match format {
|
||||
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
|
||||
PayloadType::Csv { delimiter } => {
|
||||
read_csv(&read_file, update_file.as_file_mut(), delimiter)?
|
||||
}
|
||||
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
|
||||
PayloadType::Json => read_json(&read_file, &mut update_file)?,
|
||||
PayloadType::Csv { delimiter } => read_csv(&read_file, &mut update_file, delimiter)?,
|
||||
PayloadType::Ndjson => read_ndjson(&read_file, &mut update_file)?,
|
||||
};
|
||||
// we NEED to persist the file here because we moved the `udpate_file` in another task.
|
||||
update_file.persist()?;
|
||||
@@ -450,7 +468,9 @@ async fn document_addition(
|
||||
};
|
||||
|
||||
let scheduler = index_scheduler.clone();
|
||||
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
|
||||
let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run))
|
||||
.await?
|
||||
{
|
||||
Ok(task) => task,
|
||||
Err(e) => {
|
||||
index_scheduler.delete_update_file(uuid)?;
|
||||
@@ -466,6 +486,7 @@ pub async fn delete_documents_batch(
|
||||
index_uid: web::Path<String>,
|
||||
body: web::Json<Vec<Value>>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Delete documents by batch");
|
||||
@@ -480,8 +501,12 @@ pub async fn delete_documents_batch(
|
||||
|
||||
let task =
|
||||
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Delete documents by batch");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -499,6 +524,7 @@ pub async fn delete_documents_by_filter(
|
||||
index_uid: web::Path<String>,
|
||||
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Delete documents by filter");
|
||||
@@ -516,8 +542,12 @@ pub async fn delete_documents_by_filter(
|
||||
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
|
||||
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Delete documents by filter");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -527,14 +557,19 @@ pub async fn clear_all_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
|
||||
|
||||
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Delete all documents");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -17,11 +17,13 @@ use serde_json::json;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
|
||||
use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||
use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::is_dry_run;
|
||||
use crate::Opt;
|
||||
|
||||
pub mod documents;
|
||||
pub mod facet_search;
|
||||
@@ -123,6 +125,7 @@ pub async fn create_index(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
|
||||
body: AwebJson<IndexCreateRequest, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Create index");
|
||||
@@ -137,8 +140,12 @@ pub async fn create_index(
|
||||
);
|
||||
|
||||
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
debug!(returns = ?task, "Create index");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -190,6 +197,7 @@ pub async fn update_index(
|
||||
index_uid: web::Path<String>,
|
||||
body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Update index");
|
||||
@@ -206,8 +214,12 @@ pub async fn update_index(
|
||||
primary_key: body.primary_key,
|
||||
};
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Update index");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -216,11 +228,17 @@ pub async fn update_index(
|
||||
pub async fn delete_index(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
debug!(returns = ?task, "Delete index");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -15,7 +15,8 @@ use tracing::debug;
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::Opt;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_setting_route {
|
||||
@@ -34,7 +35,8 @@ macro_rules! make_setting_route {
|
||||
use $crate::extractors::authentication::policies::*;
|
||||
use $crate::extractors::authentication::GuardedData;
|
||||
use $crate::extractors::sequential_extractor::SeqHandler;
|
||||
use $crate::routes::SummarizedTaskView;
|
||||
use $crate::Opt;
|
||||
use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView};
|
||||
|
||||
pub async fn delete(
|
||||
index_scheduler: GuardedData<
|
||||
@@ -42,6 +44,8 @@ macro_rules! make_setting_route {
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
@@ -56,8 +60,10 @@ macro_rules! make_setting_route {
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
@@ -73,6 +79,7 @@ macro_rules! make_setting_route {
|
||||
index_uid: actix_web::web::Path<String>,
|
||||
body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
$analytics_var: web::Data<dyn Analytics>,
|
||||
) -> std::result::Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
@@ -105,8 +112,10 @@ macro_rules! make_setting_route {
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
@@ -652,6 +661,7 @@ pub async fn update_all(
|
||||
index_uid: web::Path<String>,
|
||||
body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
@@ -767,8 +777,12 @@ pub async fn update_all(
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Update all settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -790,6 +804,8 @@ pub async fn get_all(
|
||||
pub async fn delete_all(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
@@ -803,8 +819,12 @@ pub async fn delete_all(
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Delete all settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -4,7 +4,7 @@ use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -15,6 +15,7 @@ use tracing::debug;
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::Opt;
|
||||
|
||||
const PAGINATION_DEFAULT_LIMIT: usize = 20;
|
||||
|
||||
@@ -45,6 +46,56 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::scope("/experimental-features").configure(features::configure));
|
||||
}
|
||||
|
||||
pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result<Option<TaskId>, ResponseError> {
|
||||
if !opt.experimental_replication_parameters {
|
||||
return Ok(None);
|
||||
}
|
||||
let task_id = req
|
||||
.headers()
|
||||
.get("TaskId")
|
||||
.map(|header| {
|
||||
header.to_str().map_err(|e| {
|
||||
ResponseError::from_msg(
|
||||
format!("TaskId is not a valid utf-8 string: {e}"),
|
||||
Code::BadRequest,
|
||||
)
|
||||
})
|
||||
})
|
||||
.transpose()?
|
||||
.map(|s| {
|
||||
s.parse::<TaskId>().map_err(|e| {
|
||||
ResponseError::from_msg(
|
||||
format!(
|
||||
"Could not parse the TaskId as a {}: {e}",
|
||||
std::any::type_name::<TaskId>(),
|
||||
),
|
||||
Code::BadRequest,
|
||||
)
|
||||
})
|
||||
})
|
||||
.transpose()?;
|
||||
Ok(task_id)
|
||||
}
|
||||
|
||||
pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
|
||||
if !opt.experimental_replication_parameters {
|
||||
return Ok(false);
|
||||
}
|
||||
Ok(req
|
||||
.headers()
|
||||
.get("DryRun")
|
||||
.map(|header| {
|
||||
header.to_str().map_err(|e| {
|
||||
ResponseError::from_msg(
|
||||
format!("DryRun is not a valid utf-8 string: {e}"),
|
||||
Code::BadRequest,
|
||||
)
|
||||
})
|
||||
})
|
||||
.transpose()?
|
||||
.map_or(false, |s| s.to_lowercase() == "true"))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SummarizedTaskView {
|
||||
@@ -308,12 +359,18 @@ async fn get_version(
|
||||
) -> HttpResponse {
|
||||
analytics.publish("Version Seen".to_string(), json!(null), Some(&req));
|
||||
|
||||
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
|
||||
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
|
||||
let build_info = build_info::BuildInfo::from_build();
|
||||
|
||||
HttpResponse::Ok().json(VersionResponse {
|
||||
commit_sha: commit_sha.to_string(),
|
||||
commit_date: commit_date.to_string(),
|
||||
commit_sha: build_info.commit_sha1.unwrap_or("unknown").to_string(),
|
||||
commit_date: build_info
|
||||
.commit_timestamp
|
||||
.and_then(|commit_timestamp| {
|
||||
commit_timestamp
|
||||
.format(&time::format_description::well_known::Iso8601::DEFAULT)
|
||||
.ok()
|
||||
})
|
||||
.unwrap_or("unknown".into()),
|
||||
pkg_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -10,7 +10,8 @@ use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::Opt;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot))));
|
||||
@@ -19,13 +20,18 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
pub async fn create_snapshot(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SNAPSHOTS_CREATE }>, Data<IndexScheduler>>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req));
|
||||
|
||||
let task = KindWithContent::SnapshotCreation;
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Create snapshot");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -10,12 +10,13 @@ use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||
use serde_json::json;
|
||||
|
||||
use super::SummarizedTaskView;
|
||||
use super::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::Opt;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes))));
|
||||
@@ -32,6 +33,7 @@ pub async fn swap_indexes(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
|
||||
params: AwebJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let params = params.into_inner();
|
||||
@@ -60,7 +62,11 @@ pub async fn swap_indexes(
|
||||
}
|
||||
|
||||
let task = KindWithContent::IndexSwap { swaps };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -18,11 +18,12 @@ use time::macros::format_description;
|
||||
use time::{Date, Duration, OffsetDateTime, Time};
|
||||
use tokio::task;
|
||||
|
||||
use super::SummarizedTaskView;
|
||||
use super::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::Opt;
|
||||
|
||||
const DEFAULT_LIMIT: u32 = 20;
|
||||
|
||||
@@ -161,6 +162,7 @@ async fn cancel_tasks(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
|
||||
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let params = params.into_inner();
|
||||
@@ -197,7 +199,11 @@ async fn cancel_tasks(
|
||||
let task_cancelation =
|
||||
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
|
||||
|
||||
let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??;
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task =
|
||||
task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid, dry_run))
|
||||
.await??;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
@@ -207,6 +213,7 @@ async fn delete_tasks(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_DELETE }>, Data<IndexScheduler>>,
|
||||
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let params = params.into_inner();
|
||||
@@ -242,7 +249,10 @@ async fn delete_tasks(
|
||||
let task_deletion =
|
||||
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
|
||||
|
||||
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??;
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid, dry_run))
|
||||
.await??;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
|
||||
@@ -100,16 +100,11 @@ impl Index<'_> {
|
||||
pub async fn raw_add_documents(
|
||||
&self,
|
||||
payload: &str,
|
||||
content_type: Option<&str>,
|
||||
headers: Vec<(&str, &str)>,
|
||||
query_parameter: &str,
|
||||
) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
|
||||
|
||||
if let Some(content_type) = content_type {
|
||||
self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await
|
||||
} else {
|
||||
self.service.post_str(url, payload, Vec::new()).await
|
||||
}
|
||||
self.service.post_str(url, payload, headers).await
|
||||
}
|
||||
|
||||
pub async fn update_documents(
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
use actix_web::test;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::common::encoder::Encoder;
|
||||
use crate::common::{GetAllDocumentsOptions, Server, Value};
|
||||
use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value};
|
||||
use crate::json;
|
||||
|
||||
/// This is the basic usage of our API and every other tests uses the content-type application/json
|
||||
@@ -2157,3 +2158,49 @@ async fn batch_several_documents_addition() {
|
||||
assert_eq!(code, 200, "failed with `{}`", response);
|
||||
assert_eq!(response["results"].as_array().unwrap().len(), 120);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn dry_register_file() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
|
||||
let options =
|
||||
Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) };
|
||||
let server = Server::new_with_options(options).await.unwrap();
|
||||
let index = server.index("tamo");
|
||||
|
||||
let documents = r#"
|
||||
{
|
||||
"id": "12",
|
||||
"doggo": "kefir"
|
||||
}
|
||||
"#;
|
||||
|
||||
let (response, code) = index
|
||||
.raw_add_documents(
|
||||
documents,
|
||||
vec![("Content-Type", "application/json"), ("DryRun", "true")],
|
||||
"",
|
||||
)
|
||||
.await;
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"taskUid": 0,
|
||||
"indexUid": "tamo",
|
||||
"status": "enqueued",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
let (response, code) = index.get_task(response.uid()).await;
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Task `0` not found.",
|
||||
"code": "task_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#task_not_found"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"404 Not Found");
|
||||
}
|
||||
|
||||
@@ -209,7 +209,8 @@ async fn replace_documents_missing_payload() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.raw_add_documents("", Some("application/json"), "").await;
|
||||
let (response, code) =
|
||||
index.raw_add_documents("", vec![("Content-Type", "application/json")], "").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -220,7 +221,8 @@ async fn replace_documents_missing_payload() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await;
|
||||
let (response, code) =
|
||||
index.raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -231,7 +233,8 @@ async fn replace_documents_missing_payload() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await;
|
||||
let (response, code) =
|
||||
index.raw_add_documents("", vec![("Content-Type", "text/csv")], "").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -287,7 +290,7 @@ async fn replace_documents_missing_content_type() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.raw_add_documents("", None, "").await;
|
||||
let (response, code) = index.raw_add_documents("", Vec::new(), "").await;
|
||||
snapshot!(code, @"415 Unsupported Media Type");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -299,7 +302,7 @@ async fn replace_documents_missing_content_type() {
|
||||
"###);
|
||||
|
||||
// even with a csv delimiter specified this error is triggered first
|
||||
let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await;
|
||||
let (response, code) = index.raw_add_documents("", Vec::new(), "?csvDelimiter=;").await;
|
||||
snapshot!(code, @"415 Unsupported Media Type");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -345,7 +348,7 @@ async fn replace_documents_bad_content_type() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index.raw_add_documents("", Some("doggo"), "").await;
|
||||
let (response, code) = index.raw_add_documents("", vec![("Content-Type", "doggo")], "").await;
|
||||
snapshot!(code, @"415 Unsupported Media Type");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -379,8 +382,9 @@ async fn replace_documents_bad_csv_delimiter() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) =
|
||||
index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await;
|
||||
let (response, code) = index
|
||||
.raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter")
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -391,8 +395,9 @@ async fn replace_documents_bad_csv_delimiter() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) =
|
||||
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
|
||||
let (response, code) = index
|
||||
.raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=doggo")
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -404,7 +409,11 @@ async fn replace_documents_bad_csv_delimiter() {
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰")))
|
||||
.raw_add_documents(
|
||||
"",
|
||||
vec![("Content-Type", "application/json")],
|
||||
&format!("?csvDelimiter={}", encode("🍰")),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
@@ -469,8 +478,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) =
|
||||
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await;
|
||||
let (response, code) = index
|
||||
.raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=a")
|
||||
.await;
|
||||
snapshot!(code, @"415 Unsupported Media Type");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
@@ -481,8 +491,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) =
|
||||
index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
|
||||
let (response, code) = index
|
||||
.raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "?csvDelimiter=a")
|
||||
.await;
|
||||
snapshot!(code, @"415 Unsupported Media Type");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use meili_snap::snapshot;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
use crate::common::encoder::Encoder;
|
||||
use crate::common::{GetAllDocumentsOptions, Server};
|
||||
@@ -209,3 +209,93 @@ async fn error_update_documents_missing_document_id() {
|
||||
"https://docs.meilisearch.com/errors#missing_document_id"
|
||||
);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn update_faceted_document() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"rankingRules": ["facet:asc"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(0).await;
|
||||
|
||||
let documents: Vec<_> = (0..1000)
|
||||
.map(|id| {
|
||||
json!({
|
||||
"doc_id": id,
|
||||
"facet": (id/3),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
let (_response, code) = index.add_documents(documents.into(), None).await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_task(1).await;
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
"doc_id": 9,
|
||||
"facet": 1.5,
|
||||
}
|
||||
]);
|
||||
|
||||
let (response, code) = index.update_documents(documents, None).await;
|
||||
assert_eq!(code, 202, "response: {}", response);
|
||||
|
||||
index.wait_task(2).await;
|
||||
|
||||
index
|
||||
.search(json!({"limit": 10}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"doc_id": 0,
|
||||
"facet": 0
|
||||
},
|
||||
{
|
||||
"doc_id": 1,
|
||||
"facet": 0
|
||||
},
|
||||
{
|
||||
"doc_id": 2,
|
||||
"facet": 0
|
||||
},
|
||||
{
|
||||
"doc_id": 3,
|
||||
"facet": 1
|
||||
},
|
||||
{
|
||||
"doc_id": 4,
|
||||
"facet": 1
|
||||
},
|
||||
{
|
||||
"doc_id": 5,
|
||||
"facet": 1
|
||||
},
|
||||
{
|
||||
"doc_id": 9,
|
||||
"facet": 1.5
|
||||
},
|
||||
{
|
||||
"doc_id": 6,
|
||||
"facet": 2
|
||||
},
|
||||
{
|
||||
"doc_id": 7,
|
||||
"facet": 2
|
||||
},
|
||||
{
|
||||
"doc_id": 8,
|
||||
"facet": 2
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -2,9 +2,10 @@ use actix_web::http::header::ContentType;
|
||||
use actix_web::test;
|
||||
use http::header::ACCEPT_ENCODING;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
|
||||
use crate::common::encoder::Encoder;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::common::{default_settings, Server, Value};
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -199,3 +200,79 @@ async fn error_create_with_invalid_index_uid() {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn send_task_id() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
|
||||
let options =
|
||||
Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) };
|
||||
let server = Server::new_with_options(options).await.unwrap();
|
||||
|
||||
let app = server.init_web_app().await;
|
||||
let index = server.index("catto");
|
||||
let (response, code) = index.create(None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 0,
|
||||
"indexUid": "catto",
|
||||
"status": "enqueued",
|
||||
"type": "indexCreation",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let body = serde_json::to_string(&json!({
|
||||
"uid": "doggo",
|
||||
"primaryKey": None::<&str>,
|
||||
}))
|
||||
.unwrap();
|
||||
let req = test::TestRequest::post()
|
||||
.uri("/indexes")
|
||||
.insert_header(("TaskId", "25"))
|
||||
.insert_header(ContentType::json())
|
||||
.set_payload(body)
|
||||
.to_request();
|
||||
|
||||
let res = test::call_service(&app, req).await;
|
||||
snapshot!(res.status(), @"202 Accepted");
|
||||
|
||||
let bytes = test::read_body(res).await;
|
||||
let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 25,
|
||||
"indexUid": "doggo",
|
||||
"status": "enqueued",
|
||||
"type": "indexCreation",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let body = serde_json::to_string(&json!({
|
||||
"uid": "girafo",
|
||||
"primaryKey": None::<&str>,
|
||||
}))
|
||||
.unwrap();
|
||||
let req = test::TestRequest::post()
|
||||
.uri("/indexes")
|
||||
.insert_header(("TaskId", "12"))
|
||||
.insert_header(ContentType::json())
|
||||
.set_payload(body)
|
||||
.to_request();
|
||||
|
||||
let res = test::call_service(&app, req).await;
|
||||
snapshot!(res.status(), @"400 Bad Request");
|
||||
|
||||
let bytes = test::read_body(res).await;
|
||||
let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Received bad task id: 12 should be >= to 26.",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::sync::Arc;
|
||||
use actix_http::body::MessageBody;
|
||||
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
||||
use actix_web::web::{Bytes, Data};
|
||||
use actix_web::{post, App, HttpResponse, HttpServer};
|
||||
use actix_web::{post, App, HttpRequest, HttpResponse, HttpServer};
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
use tokio::sync::mpsc;
|
||||
@@ -17,7 +17,17 @@ use crate::common::{default_settings, Server};
|
||||
use crate::json;
|
||||
|
||||
#[post("/")]
|
||||
async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
|
||||
async fn forward_body(
|
||||
req: HttpRequest,
|
||||
sender: Data<mpsc::UnboundedSender<Vec<u8>>>,
|
||||
body: Bytes,
|
||||
) -> HttpResponse {
|
||||
let headers = req.headers();
|
||||
assert_eq!(headers.get("content-type").unwrap(), "application/x-ndjson");
|
||||
assert_eq!(headers.get("transfer-encoding").unwrap(), "chunked");
|
||||
assert_eq!(headers.get("accept-encoding").unwrap(), "gzip");
|
||||
assert_eq!(headers.get("content-encoding").unwrap(), "gzip");
|
||||
|
||||
let body = body.to_vec();
|
||||
sender.send(body).unwrap();
|
||||
HttpResponse::Ok().into()
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -26,7 +26,7 @@ flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
geoutils = "0.5.1"
|
||||
grenad = { version = "0.4.5", default-features = false, features = [
|
||||
grenad = { git = "https://github.com/meilisearch/grenad.git", branch = "keep-source-index-in-merger", version = "0.4.5", default-features = false, features = [
|
||||
"rayon",
|
||||
"tempfile",
|
||||
] }
|
||||
@@ -70,13 +70,13 @@ itertools = "0.11.0"
|
||||
# profiling
|
||||
puffin = "0.16.0"
|
||||
|
||||
# logging
|
||||
logging_timer = "1.1.0"
|
||||
csv = "1.3.0"
|
||||
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||
candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||
candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = ["onig"] }
|
||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = [
|
||||
"onig",
|
||||
] }
|
||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
|
||||
"online",
|
||||
] }
|
||||
|
||||
@@ -67,6 +67,8 @@ pub mod main_key {
|
||||
pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
|
||||
pub const PROXIMITY_PRECISION: &str = "proximity-precision";
|
||||
pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
|
||||
|
||||
pub const CORRUPTED: &str = "corrupted";
|
||||
}
|
||||
|
||||
pub mod db_name {
|
||||
@@ -1507,6 +1509,103 @@ impl Index {
|
||||
_ => "default".to_owned(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn check_document_facet_consistency(
|
||||
&self,
|
||||
rtxn: &RoTxn<'_>,
|
||||
) -> Result<DocumentFacetConsistency> {
|
||||
let documents = self.documents_ids(rtxn)?;
|
||||
|
||||
let field_ids_map = self.fields_ids_map(rtxn)?;
|
||||
|
||||
let mut facets = Vec::new();
|
||||
let mut facet_exists = Vec::new();
|
||||
let faceted_fields = self.user_defined_faceted_fields(rtxn)?;
|
||||
for fid in field_ids_map.ids() {
|
||||
let facet_name = field_ids_map.name(fid).unwrap();
|
||||
if !faceted_fields.contains(facet_name) {
|
||||
continue;
|
||||
};
|
||||
let mut facet = RoaringBitmap::new();
|
||||
|
||||
// value doesn't matter here we'll truncate to the level
|
||||
let key = crate::heed_codec::facet::FacetGroupKey {
|
||||
field_id: fid,
|
||||
level: 0,
|
||||
left_bound: &[] as _,
|
||||
};
|
||||
|
||||
for res in self
|
||||
.facet_id_f64_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<crate::heed_codec::BytesRefCodec>>()
|
||||
.prefix_iter(rtxn, &key)?
|
||||
{
|
||||
let (_k, v) = res?;
|
||||
facet |= v.bitmap;
|
||||
}
|
||||
|
||||
for res in self
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<crate::heed_codec::BytesRefCodec>>()
|
||||
.prefix_iter(rtxn, &key)?
|
||||
{
|
||||
let (_k, v) = res?;
|
||||
facet |= v.bitmap;
|
||||
}
|
||||
|
||||
facets.push((field_ids_map.name(fid).unwrap().to_owned(), facet));
|
||||
facet_exists.push(self.exists_faceted_documents_ids(rtxn, fid)?);
|
||||
}
|
||||
|
||||
Ok(DocumentFacetConsistency { documents, facets, facet_exists })
|
||||
}
|
||||
|
||||
pub fn mark_as_corrupted(&self, wtxn: &mut RwTxn<'_>) -> Result<()> {
|
||||
Ok(self.main.remap_types::<Str, Str>().put(wtxn, main_key::CORRUPTED, "corrupted")?)
|
||||
}
|
||||
|
||||
pub fn is_corrupted(&self, txn: &RoTxn<'_>) -> Result<bool> {
|
||||
Ok(self.main.remap_types::<Str, Str>().get(txn, main_key::CORRUPTED)?.is_some())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFacetConsistency {
|
||||
documents: RoaringBitmap,
|
||||
facets: Vec<(String, RoaringBitmap)>,
|
||||
facet_exists: Vec<RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl DocumentFacetConsistency {
|
||||
pub fn check(&self) {
|
||||
let mut inconsistencies = 0;
|
||||
for ((field_name, facet), _facet_exists) in self.facets.iter().zip(self.facet_exists.iter())
|
||||
{
|
||||
if field_name == "_geo" {
|
||||
continue;
|
||||
}
|
||||
|
||||
// only check the internal ids missing in documents as it is the grave condition
|
||||
// let documents = self.documents.clone() & facet_exists;
|
||||
let documents = self.documents.clone();
|
||||
// let missing_in_facets = &documents - facet;
|
||||
let missing_in_documents = facet - documents;
|
||||
|
||||
/*for id in missing_in_facets {
|
||||
tracing::error!(id, field_name, "Missing in facets");
|
||||
inconsistencies += 1;
|
||||
}*/
|
||||
for id in missing_in_documents {
|
||||
tracing::error!(id, field_name, "Missing in documents");
|
||||
inconsistencies += 1;
|
||||
}
|
||||
}
|
||||
if inconsistencies > 0 {
|
||||
panic!(
|
||||
"Panicked due to the previous {} inconsistencies between documents and facets",
|
||||
inconsistencies
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -15,7 +15,7 @@ pub struct BucketSortOutput {
|
||||
|
||||
// TODO: would probably be good to regroup some of these inside of a struct?
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::bucket_sort")]
|
||||
pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
|
||||
|
||||
@@ -191,7 +191,7 @@ fn resolve_maximally_reduced_query_graph(
|
||||
Ok(docids)
|
||||
}
|
||||
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search")]
|
||||
fn resolve_universe(
|
||||
ctx: &mut SearchContext,
|
||||
initial_universe: &RoaringBitmap,
|
||||
@@ -557,7 +557,7 @@ pub fn execute_vector_search(
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search")]
|
||||
pub fn execute_search(
|
||||
ctx: &mut SearchContext,
|
||||
query: Option<&str>,
|
||||
@@ -577,6 +577,9 @@ pub fn execute_search(
|
||||
|
||||
let mut located_query_terms = None;
|
||||
let query_terms = if let Some(query) = query {
|
||||
let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder");
|
||||
let entered = span.enter();
|
||||
|
||||
// We make sure that the analyzer is aware of the stop words
|
||||
// this ensures that the query builder is able to properly remove them.
|
||||
let mut tokbuilder = TokenizerBuilder::new();
|
||||
@@ -605,7 +608,12 @@ pub fn execute_search(
|
||||
}
|
||||
|
||||
let tokenizer = tokbuilder.build();
|
||||
drop(entered);
|
||||
|
||||
let span = tracing::trace_span!(target: "search::tokens", "tokenize");
|
||||
let entered = span.enter();
|
||||
let tokens = tokenizer.tokenize(query);
|
||||
drop(entered);
|
||||
|
||||
let query_terms = located_query_terms_from_tokens(ctx, tokens, words_limit)?;
|
||||
if query_terms.is_empty() {
|
||||
|
||||
@@ -6,9 +6,10 @@ use fst::automaton::Str;
|
||||
use fst::{Automaton, IntoStreamer, Streamer};
|
||||
use heed::types::DecodeIgnore;
|
||||
|
||||
use super::*;
|
||||
use super::{OneTypoTerm, Phrase, QueryTerm, ZeroTypoTerm};
|
||||
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||
use crate::search::new::query_term::TwoTypoTerm;
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::{Lazy, TwoTypoTerm};
|
||||
use crate::search::new::{limits, SearchContext};
|
||||
use crate::search::{build_dfa, get_first};
|
||||
use crate::{Result, MAX_WORD_LENGTH};
|
||||
|
||||
@@ -7,7 +7,6 @@ use std::collections::BTreeSet;
|
||||
use std::iter::FromIterator;
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
use compute_derivations::partially_initialized_term_from_word;
|
||||
use either::Either;
|
||||
pub use ntypo_subset::NTypoTermSubset;
|
||||
pub use parse_query::{located_query_terms_from_tokens, make_ngram, number_of_typos_allowed};
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use charabia::normalizer::NormalizedTokenIter;
|
||||
use charabia::{SeparatorKind, TokenKind};
|
||||
|
||||
use super::*;
|
||||
use super::compute_derivations::partially_initialized_term_from_word;
|
||||
use super::{LocatedQueryTerm, ZeroTypoTerm};
|
||||
use crate::search::new::query_term::{Lazy, Phrase, QueryTerm};
|
||||
use crate::{Result, SearchContext, MAX_WORD_LENGTH};
|
||||
|
||||
/// Convert the tokenised search query into a list of located query terms.
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
|
||||
pub fn located_query_terms_from_tokens(
|
||||
ctx: &mut SearchContext,
|
||||
query: NormalizedTokenIter,
|
||||
@@ -225,7 +229,7 @@ pub fn make_ngram(
|
||||
}
|
||||
|
||||
struct PhraseBuilder {
|
||||
words: Vec<Option<Interned<String>>>,
|
||||
words: Vec<Option<crate::search::new::Interned<String>>>,
|
||||
start: u16,
|
||||
end: u16,
|
||||
}
|
||||
|
||||
@@ -18,15 +18,39 @@ use crate::update::index_documents::valid_lmdb_key;
|
||||
use crate::update::MergeFn;
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result};
|
||||
|
||||
enum InsertionResult {
|
||||
/// Enum used as a return value for the facet incremental indexing.
|
||||
///
|
||||
/// - `ModificationResult::InPlace` means that modifying the `facet_value` into the `level` did not have
|
||||
/// an effect on the number of keys in that level. Therefore, it did not increase the number of children
|
||||
/// of the parent node.
|
||||
///
|
||||
/// - `ModificationResult::Insert` means that modifying the `facet_value` into the `level` resulted
|
||||
/// in the addition of a new key in that level, and that therefore the number of children
|
||||
/// of the parent node should be incremented.
|
||||
///
|
||||
/// - `ModificationResult::Remove` means that modifying the `facet_value` into the `level` resulted in a change in the
|
||||
/// number of keys in the level. For example, removing a document id from the facet value `3` could
|
||||
/// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted
|
||||
/// entirely. In that case, `ModificationResult::Remove` is returned. The parent of the deleted key must
|
||||
/// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well.
|
||||
///
|
||||
/// - `ModificationResult::Reduce/Expand` means that modifying the `facet_value` into the `level` resulted in a change in the
|
||||
/// bounds of the keys of the level. For example, removing a document id from the facet value
|
||||
/// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore,
|
||||
/// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4).
|
||||
/// In that case `ModificationResult::Reduce` is returned. The parent of the reduced key may need to adjust
|
||||
/// its left bound as well.
|
||||
///
|
||||
/// - `ModificationResult::Nothing` means that modifying the `facet_value` didn't have any impact into the `level`.
|
||||
/// This case is reachable when a document id is removed from a sub-level node but is still present in another one.
|
||||
/// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0` but should remain in the group node [1..4] in `level 1`.
|
||||
enum ModificationResult {
|
||||
InPlace,
|
||||
Expand,
|
||||
Insert,
|
||||
}
|
||||
enum DeletionResult {
|
||||
InPlace,
|
||||
Reduce { next: Option<Vec<u8>> },
|
||||
Remove { next: Option<Vec<u8>> },
|
||||
Nothing,
|
||||
}
|
||||
|
||||
/// Algorithm to incrementally insert and delete elememts into the
|
||||
@@ -65,8 +89,9 @@ impl FacetsUpdateIncremental {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::incremental")]
|
||||
pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
|
||||
let mut current_field_id = None;
|
||||
let mut facet_level_may_be_updated = false;
|
||||
let mut iter = self.delta_data.into_stream_merger_iter()?;
|
||||
|
||||
while let Some((key, value)) = iter.next()? {
|
||||
if !valid_lmdb_key(key) {
|
||||
continue;
|
||||
@@ -74,25 +99,47 @@ impl FacetsUpdateIncremental {
|
||||
|
||||
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key)
|
||||
.map_err(heed::Error::Encoding)?;
|
||||
|
||||
if facet_level_may_be_updated
|
||||
&& current_field_id.map_or(false, |fid| fid != key.field_id)
|
||||
{
|
||||
// Only add or remove a level after making all the field modifications.
|
||||
self.inner.add_or_delete_level(wtxn, current_field_id.unwrap())?;
|
||||
facet_level_may_be_updated = false;
|
||||
}
|
||||
current_field_id = Some(key.field_id);
|
||||
|
||||
let value = KvReader::new(value);
|
||||
let docids_to_delete = value
|
||||
.get(DelAdd::Deletion)
|
||||
.map(CboRoaringBitmapCodec::bytes_decode)
|
||||
.map(|o| o.map_err(heed::Error::Encoding));
|
||||
.map(|o| o.map_err(heed::Error::Encoding))
|
||||
.transpose()?;
|
||||
|
||||
let docids_to_add = value
|
||||
.get(DelAdd::Addition)
|
||||
.map(CboRoaringBitmapCodec::bytes_decode)
|
||||
.map(|o| o.map_err(heed::Error::Encoding));
|
||||
.map(|o| o.map_err(heed::Error::Encoding))
|
||||
.transpose()?;
|
||||
|
||||
if let Some(docids_to_delete) = docids_to_delete {
|
||||
let docids_to_delete = docids_to_delete?;
|
||||
self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?;
|
||||
let level_size_changed = self.inner.modify(
|
||||
wtxn,
|
||||
key.field_id,
|
||||
key.left_bound,
|
||||
docids_to_add.as_ref(),
|
||||
docids_to_delete.as_ref(),
|
||||
)?;
|
||||
|
||||
if level_size_changed {
|
||||
// if a node has been added or removed from the highest level,
|
||||
// we may have to update the facet level.
|
||||
facet_level_may_be_updated = true;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(docids_to_add) = docids_to_add {
|
||||
let docids_to_add = docids_to_add?;
|
||||
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?;
|
||||
if let Some(field_id) = current_field_id {
|
||||
if facet_level_may_be_updated {
|
||||
self.inner.add_or_delete_level(wtxn, field_id)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,138 +213,78 @@ impl FacetsUpdateIncrementalInner {
|
||||
///
|
||||
/// ## Return
|
||||
/// See documentation of `insert_in_level`
|
||||
fn insert_in_level_0(
|
||||
fn modify_in_level_0(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
facet_value: &[u8],
|
||||
docids: &RoaringBitmap,
|
||||
) -> Result<InsertionResult> {
|
||||
add_docids: Option<&RoaringBitmap>,
|
||||
del_docids: Option<&RoaringBitmap>,
|
||||
) -> Result<ModificationResult> {
|
||||
let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value };
|
||||
let value = FacetGroupValue { bitmap: docids.clone(), size: 1 };
|
||||
|
||||
let mut level0_prefix = vec![];
|
||||
level0_prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||
level0_prefix.push(0);
|
||||
|
||||
let mut iter =
|
||||
self.db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?;
|
||||
|
||||
if iter.next().is_none() {
|
||||
drop(iter);
|
||||
self.db.put(txn, &key, &value)?;
|
||||
Ok(InsertionResult::Insert)
|
||||
} else {
|
||||
drop(iter);
|
||||
let old_value = self.db.get(txn, &key)?;
|
||||
match old_value {
|
||||
Some(mut updated_value) => {
|
||||
// now merge the two
|
||||
updated_value.bitmap |= value.bitmap;
|
||||
self.db.put(txn, &key, &updated_value)?;
|
||||
Ok(InsertionResult::InPlace)
|
||||
}
|
||||
None => {
|
||||
let old_value = self.db.get(txn, &key)?;
|
||||
match (old_value, add_docids, del_docids) {
|
||||
// Addition + deletion on an existing value
|
||||
(Some(FacetGroupValue { bitmap, .. }), Some(add_docids), Some(del_docids)) => {
|
||||
let value = FacetGroupValue { bitmap: (bitmap - del_docids) | add_docids, size: 1 };
|
||||
self.db.put(txn, &key, &value)?;
|
||||
Ok(ModificationResult::InPlace)
|
||||
}
|
||||
// Addition on an existing value
|
||||
(Some(FacetGroupValue { bitmap, .. }), Some(add_docids), None) => {
|
||||
let value = FacetGroupValue { bitmap: bitmap | add_docids, size: 1 };
|
||||
self.db.put(txn, &key, &value)?;
|
||||
Ok(ModificationResult::InPlace)
|
||||
}
|
||||
// Addition of a new value (ignore deletion)
|
||||
(None, Some(add_docids), _) => {
|
||||
let value = FacetGroupValue { bitmap: add_docids.clone(), size: 1 };
|
||||
self.db.put(txn, &key, &value)?;
|
||||
Ok(ModificationResult::Insert)
|
||||
}
|
||||
// Deletion on an existing value, fully delete the key if the resulted value is empty.
|
||||
(Some(FacetGroupValue { mut bitmap, .. }), None, Some(del_docids)) => {
|
||||
bitmap -= del_docids;
|
||||
if bitmap.is_empty() {
|
||||
// Full deletion
|
||||
let mut next_key = None;
|
||||
if let Some((next, _)) =
|
||||
self.db.remap_data_type::<DecodeIgnore>().get_greater_than(txn, &key)?
|
||||
{
|
||||
if next.field_id == field_id && next.level == 0 {
|
||||
next_key = Some(next.left_bound.to_vec());
|
||||
}
|
||||
}
|
||||
self.db.delete(txn, &key)?;
|
||||
Ok(ModificationResult::Remove { next: next_key })
|
||||
} else {
|
||||
// Partial deletion
|
||||
let value = FacetGroupValue { bitmap, size: 1 };
|
||||
self.db.put(txn, &key, &value)?;
|
||||
Ok(InsertionResult::Insert)
|
||||
Ok(ModificationResult::InPlace)
|
||||
}
|
||||
}
|
||||
// Otherwise do nothing (None + no addition + deletion == Some + no addition + no deletion == Nothing),
|
||||
// may be unreachable at some point.
|
||||
(None, None, _) | (Some(_), None, None) => Ok(ModificationResult::Nothing),
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert the given facet value and corresponding document ids in all the levels of the database up to the given `level`.
|
||||
/// This function works recursively.
|
||||
/// Split a level node into two balanced nodes.
|
||||
///
|
||||
/// ## Return
|
||||
/// Returns the effect of adding the facet value to the database on the given `level`.
|
||||
///
|
||||
/// - `InsertionResult::InPlace` means that inserting the `facet_value` into the `level` did not have
|
||||
/// an effect on the number of keys in that level. Therefore, it did not increase the number of children
|
||||
/// of the parent node.
|
||||
///
|
||||
/// - `InsertionResult::Insert` means that inserting the `facet_value` into the `level` resulted
|
||||
/// in the addition of a new key in that level, and that therefore the number of children
|
||||
/// of the parent node should be incremented.
|
||||
fn insert_in_level(
|
||||
/// # Return
|
||||
/// Returns `ModificationResult::Insert` if the split is successful.
|
||||
fn split_group(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
level: u8,
|
||||
facet_value: &[u8],
|
||||
docids: &RoaringBitmap,
|
||||
) -> Result<InsertionResult> {
|
||||
if level == 0 {
|
||||
return self.insert_in_level_0(txn, field_id, facet_value, docids);
|
||||
}
|
||||
|
||||
let max_group_size = self.max_group_size;
|
||||
|
||||
let result = self.insert_in_level(txn, field_id, level - 1, facet_value, docids)?;
|
||||
// level below inserted an element
|
||||
|
||||
let (insertion_key, insertion_value) =
|
||||
self.find_insertion_key_value(field_id, level, facet_value, txn)?;
|
||||
|
||||
match result {
|
||||
// because we know that we inserted in place, the facet_value is not a new one
|
||||
// thus it doesn't extend a group, and thus the insertion key computed above is
|
||||
// still correct
|
||||
InsertionResult::InPlace => {
|
||||
let mut updated_value = insertion_value;
|
||||
updated_value.bitmap |= docids;
|
||||
self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
|
||||
|
||||
return Ok(InsertionResult::InPlace);
|
||||
}
|
||||
InsertionResult::Expand => {}
|
||||
InsertionResult::Insert => {}
|
||||
}
|
||||
|
||||
// Here we know that inserting the facet value in the level below resulted in the creation
|
||||
// of a new key. Therefore, it may be the case that we need to modify the left bound of the
|
||||
// insertion key (see documentation of `find_insertion_key_value` for an example of when that
|
||||
// could happen).
|
||||
let (insertion_key, insertion_key_was_modified) = {
|
||||
let mut new_insertion_key = insertion_key.clone();
|
||||
let mut key_should_be_modified = false;
|
||||
|
||||
if facet_value < insertion_key.left_bound.as_slice() {
|
||||
new_insertion_key.left_bound = facet_value.to_vec();
|
||||
key_should_be_modified = true;
|
||||
}
|
||||
if key_should_be_modified {
|
||||
let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
|
||||
assert!(is_deleted);
|
||||
self.db.put(txn, &new_insertion_key.as_ref(), &insertion_value)?;
|
||||
}
|
||||
(new_insertion_key, key_should_be_modified)
|
||||
};
|
||||
// Now we know that the insertion key contains the `facet_value`.
|
||||
|
||||
// We still need to update the insertion value by:
|
||||
// 1. Incrementing the number of children (since the recursive call returned `InsertionResult::Insert`)
|
||||
// 2. Merge the previous docids with the new one
|
||||
let mut updated_value = insertion_value;
|
||||
|
||||
if matches!(result, InsertionResult::Insert) {
|
||||
updated_value.size += 1;
|
||||
}
|
||||
|
||||
if updated_value.size < max_group_size {
|
||||
updated_value.bitmap |= docids;
|
||||
self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
|
||||
if insertion_key_was_modified {
|
||||
return Ok(InsertionResult::Expand);
|
||||
} else {
|
||||
return Ok(InsertionResult::InPlace);
|
||||
}
|
||||
}
|
||||
|
||||
// We've increased the group size of the value and realised it has become greater than or equal to `max_group_size`
|
||||
// Therefore it must be split into two nodes.
|
||||
|
||||
let size_left = updated_value.size / 2;
|
||||
let size_right = updated_value.size - size_left;
|
||||
insertion_key: FacetGroupKey<Vec<u8>>,
|
||||
insertion_value: FacetGroupValue,
|
||||
) -> Result<ModificationResult> {
|
||||
let size_left = insertion_value.size / 2;
|
||||
let size_right = insertion_value.size - size_left;
|
||||
|
||||
let level_below = level - 1;
|
||||
|
||||
@@ -351,34 +338,228 @@ impl FacetsUpdateIncrementalInner {
|
||||
self.db.put(txn, &group_left.0.as_ref(), &group_left.1)?;
|
||||
self.db.put(txn, &group_right.0.as_ref(), &group_right.1)?;
|
||||
|
||||
Ok(InsertionResult::Insert)
|
||||
Ok(ModificationResult::Insert)
|
||||
}
|
||||
|
||||
/// Insert the given facet value and corresponding document ids in the database.
|
||||
pub fn insert(
|
||||
/// Remove the docids still present in the related sub-level nodes from the del_docids.
|
||||
///
|
||||
/// This process is needed to avoid removing docids from a group node where the docid is present in several sub-nodes.
|
||||
fn trim_del_docids<'a>(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
level: u8,
|
||||
insertion_key: &FacetGroupKey<Vec<u8>>,
|
||||
insertion_value_size: usize,
|
||||
del_docids: &'a RoaringBitmap,
|
||||
) -> Result<std::borrow::Cow<'a, RoaringBitmap>> {
|
||||
let level_below = level - 1;
|
||||
|
||||
let start_key = FacetGroupKey {
|
||||
field_id,
|
||||
level: level_below,
|
||||
left_bound: insertion_key.left_bound.as_slice(),
|
||||
};
|
||||
|
||||
let mut del_docids = std::borrow::Cow::Borrowed(del_docids);
|
||||
let iter = self.db.range(txn, &(start_key..))?.take(insertion_value_size);
|
||||
for next in iter {
|
||||
let (_, value) = next?;
|
||||
// if a sublevel bitmap as common docids with del_docids,
|
||||
// then these docids shouldn't be removed and so, remove them from the deletion list.
|
||||
if !value.bitmap.is_disjoint(&del_docids) {
|
||||
*del_docids.to_mut() -= value.bitmap;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(del_docids)
|
||||
}
|
||||
|
||||
/// Modify the given facet value and corresponding document ids in all the levels of the database up to the given `level`.
|
||||
/// This function works recursively.
|
||||
///
|
||||
/// ## Return
|
||||
/// Returns the effect of modifying the facet value to the database on the given `level`.
|
||||
///
|
||||
fn modify_in_level(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
level: u8,
|
||||
facet_value: &[u8],
|
||||
add_docids: Option<&RoaringBitmap>,
|
||||
del_docids: Option<&RoaringBitmap>,
|
||||
) -> Result<ModificationResult> {
|
||||
if level == 0 {
|
||||
return self.modify_in_level_0(txn, field_id, facet_value, add_docids, del_docids);
|
||||
}
|
||||
|
||||
let result =
|
||||
self.modify_in_level(txn, field_id, level - 1, facet_value, add_docids, del_docids)?;
|
||||
// level below inserted an element
|
||||
|
||||
if let ModificationResult::Nothing = result {
|
||||
// if the previous level has not been modified,
|
||||
// early return ModificationResult::Nothing.
|
||||
return Ok(ModificationResult::Nothing);
|
||||
}
|
||||
|
||||
let (insertion_key, insertion_value) =
|
||||
self.find_insertion_key_value(field_id, level, facet_value, txn)?;
|
||||
let insertion_value_size = insertion_value.size as usize;
|
||||
|
||||
let mut insertion_value_was_modified = false;
|
||||
let mut updated_value = insertion_value;
|
||||
|
||||
if let ModificationResult::Insert = result {
|
||||
// if a key has been inserted in the sub-level raise the value size.
|
||||
updated_value.size += 1;
|
||||
insertion_value_was_modified = true;
|
||||
} else if let ModificationResult::Remove { .. } = result {
|
||||
if updated_value.size <= 1 {
|
||||
// if the only remaining node is the one to delete,
|
||||
// delete the key instead and early return.
|
||||
let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
|
||||
assert!(is_deleted);
|
||||
return Ok(result);
|
||||
} else {
|
||||
// Reduce the value size
|
||||
updated_value.size -= 1;
|
||||
insertion_value_was_modified = true;
|
||||
}
|
||||
}
|
||||
|
||||
let (insertion_key, insertion_key_modification) =
|
||||
if let ModificationResult::InPlace = result {
|
||||
(insertion_key, ModificationResult::InPlace)
|
||||
} else {
|
||||
// Inserting or deleting the facet value in the level below resulted in the creation
|
||||
// of a new key. Therefore, it may be the case that we need to modify the left bound of the
|
||||
// insertion key (see documentation of `find_insertion_key_value` for an example of when that
|
||||
// could happen).
|
||||
let mut new_insertion_key = insertion_key.clone();
|
||||
let mut key_modification = ModificationResult::InPlace;
|
||||
|
||||
if let ModificationResult::Remove { next } | ModificationResult::Reduce { next } =
|
||||
result
|
||||
{
|
||||
// if the deleted facet_value is the left_bound of the current node,
|
||||
// the left_bound should be updated reducing the current node.
|
||||
let reduced_range = facet_value == insertion_key.left_bound;
|
||||
if reduced_range {
|
||||
new_insertion_key.left_bound = next.clone().unwrap();
|
||||
key_modification = ModificationResult::Reduce { next };
|
||||
}
|
||||
} else if facet_value < insertion_key.left_bound.as_slice() {
|
||||
// if the added facet_value is the under the left_bound of the current node,
|
||||
// the left_bound should be updated expanding the current node.
|
||||
new_insertion_key.left_bound = facet_value.to_vec();
|
||||
key_modification = ModificationResult::Expand;
|
||||
}
|
||||
|
||||
if matches!(
|
||||
key_modification,
|
||||
ModificationResult::Expand | ModificationResult::Reduce { .. }
|
||||
) {
|
||||
// if the node should be updated, delete it, it will be recreated using a new key later.
|
||||
let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
|
||||
assert!(is_deleted);
|
||||
}
|
||||
(new_insertion_key, key_modification)
|
||||
};
|
||||
|
||||
if updated_value.size < self.max_group_size {
|
||||
// If there are docids to delete, trim them avoiding unexpected removal.
|
||||
if let Some(del_docids) = del_docids
|
||||
.map(|ids| {
|
||||
self.trim_del_docids(
|
||||
txn,
|
||||
field_id,
|
||||
level,
|
||||
&insertion_key,
|
||||
insertion_value_size,
|
||||
ids,
|
||||
)
|
||||
})
|
||||
.transpose()?
|
||||
.filter(|ids| !ids.is_empty())
|
||||
{
|
||||
updated_value.bitmap -= &*del_docids;
|
||||
insertion_value_was_modified = true;
|
||||
}
|
||||
|
||||
if let Some(add_docids) = add_docids {
|
||||
updated_value.bitmap |= add_docids;
|
||||
insertion_value_was_modified = true;
|
||||
}
|
||||
|
||||
if insertion_value_was_modified
|
||||
|| matches!(
|
||||
insertion_key_modification,
|
||||
ModificationResult::Expand | ModificationResult::Reduce { .. }
|
||||
)
|
||||
{
|
||||
// if any modification occured, insert it in the database.
|
||||
self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
|
||||
Ok(insertion_key_modification)
|
||||
} else {
|
||||
// this case is reachable when a docid is removed from a sub-level node but is still present in another one.
|
||||
// For instance, a document containing 2 and 3, if 2 is removed, the docid should remain in the group node [1..4].
|
||||
Ok(ModificationResult::Nothing)
|
||||
}
|
||||
} else {
|
||||
// We've increased the group size of the value and realised it has become greater than or equal to `max_group_size`
|
||||
// Therefore it must be split into two nodes.
|
||||
self.split_group(txn, field_id, level, insertion_key, updated_value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Modify the given facet value and corresponding document ids in the database.
|
||||
/// If no more document ids correspond to the facet value, delete it completely.
|
||||
///
|
||||
/// ## Return
|
||||
/// Returns `true` if some tree-nodes of the highest level have been removed or added implying a potential
|
||||
/// addition or deletion of a facet level.
|
||||
/// Otherwise returns `false` if the tree-nodes have been modified in place.
|
||||
pub fn modify(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
facet_value: &[u8],
|
||||
docids: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
if docids.is_empty() {
|
||||
return Ok(());
|
||||
add_docids: Option<&RoaringBitmap>,
|
||||
del_docids: Option<&RoaringBitmap>,
|
||||
) -> Result<bool> {
|
||||
if add_docids.map_or(true, RoaringBitmap::is_empty)
|
||||
&& del_docids.map_or(true, RoaringBitmap::is_empty)
|
||||
{
|
||||
return Ok(false);
|
||||
}
|
||||
let group_size = self.group_size;
|
||||
|
||||
let highest_level = get_highest_level(txn, self.db, field_id)?;
|
||||
|
||||
let result = self.insert_in_level(txn, field_id, highest_level, facet_value, docids)?;
|
||||
let result = self.modify_in_level(
|
||||
txn,
|
||||
field_id,
|
||||
highest_level,
|
||||
facet_value,
|
||||
add_docids,
|
||||
del_docids,
|
||||
)?;
|
||||
match result {
|
||||
InsertionResult::InPlace => return Ok(()),
|
||||
InsertionResult::Expand => return Ok(()),
|
||||
InsertionResult::Insert => {}
|
||||
ModificationResult::InPlace
|
||||
| ModificationResult::Expand
|
||||
| ModificationResult::Nothing
|
||||
| ModificationResult::Reduce { .. } => Ok(false),
|
||||
ModificationResult::Insert | ModificationResult::Remove { .. } => Ok(true),
|
||||
}
|
||||
}
|
||||
|
||||
// Here we check whether the highest level has exceeded `min_level_size` * `self.group_size`.
|
||||
// If it has, we must build an addition level above it.
|
||||
|
||||
/// Check whether the highest level has exceeded `min_level_size` * `self.group_size`.
|
||||
/// If it has, we must build an addition level above it.
|
||||
/// Then check whether the highest level is under `min_level_size`.
|
||||
/// If it has, we must remove the complete level.
|
||||
pub(crate) fn add_or_delete_level(&self, txn: &mut RwTxn, field_id: u16) -> Result<()> {
|
||||
let highest_level = get_highest_level(txn, self.db, field_id)?;
|
||||
let mut highest_level_prefix = vec![];
|
||||
highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||
highest_level_prefix.push(highest_level);
|
||||
@@ -386,14 +567,48 @@ impl FacetsUpdateIncrementalInner {
|
||||
let size_highest_level =
|
||||
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?.count();
|
||||
|
||||
if size_highest_level < self.group_size as usize * self.min_level_size as usize {
|
||||
return Ok(());
|
||||
if size_highest_level >= self.group_size as usize * self.min_level_size as usize {
|
||||
self.add_level(txn, field_id, highest_level, &highest_level_prefix, size_highest_level)
|
||||
} else if size_highest_level < self.min_level_size as usize && highest_level != 0 {
|
||||
self.delete_level(txn, &highest_level_prefix)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete a level.
|
||||
fn delete_level(&self, txn: &mut RwTxn, highest_level_prefix: &[u8]) -> Result<()> {
|
||||
let mut to_delete = vec![];
|
||||
let mut iter =
|
||||
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, highest_level_prefix)?;
|
||||
for el in iter.by_ref() {
|
||||
let (k, _) = el?;
|
||||
to_delete.push(
|
||||
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
|
||||
.map_err(Error::Encoding)?
|
||||
.into_owned(),
|
||||
);
|
||||
}
|
||||
drop(iter);
|
||||
for k in to_delete {
|
||||
self.db.delete(txn, &k.as_ref())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Build an additional level for the field id.
|
||||
fn add_level(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
highest_level: u8,
|
||||
highest_level_prefix: &[u8],
|
||||
size_highest_level: usize,
|
||||
) -> Result<()> {
|
||||
let mut groups_iter = self
|
||||
.db
|
||||
.remap_types::<Bytes, FacetGroupValueCodec>()
|
||||
.prefix_iter(txn, &highest_level_prefix)?;
|
||||
.prefix_iter(txn, highest_level_prefix)?;
|
||||
|
||||
let nbr_new_groups = size_highest_level / self.group_size as usize;
|
||||
let nbr_leftover_elements = size_highest_level % self.group_size as usize;
|
||||
@@ -402,7 +617,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
for _ in 0..nbr_new_groups {
|
||||
let mut first_key = None;
|
||||
let mut values = RoaringBitmap::new();
|
||||
for _ in 0..group_size {
|
||||
for _ in 0..self.group_size {
|
||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||
let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
|
||||
.map_err(Error::Encoding)?;
|
||||
@@ -417,7 +632,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
level: highest_level + 1,
|
||||
left_bound: first_key.unwrap().left_bound,
|
||||
};
|
||||
let value = FacetGroupValue { size: group_size, bitmap: values };
|
||||
let value = FacetGroupValue { size: self.group_size, bitmap: values };
|
||||
to_add.push((key.into_owned(), value));
|
||||
}
|
||||
// now we add the rest of the level, in case its size is > group_size * min_level_size
|
||||
@@ -452,173 +667,6 @@ impl FacetsUpdateIncrementalInner {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete the given document id from the given facet value in the database, from level 0 to the
|
||||
/// the given level.
|
||||
///
|
||||
/// ## Return
|
||||
/// Returns the effect of removing the document id from the database on the given `level`.
|
||||
///
|
||||
/// - `DeletionResult::InPlace` means that deleting the document id did not have
|
||||
/// an effect on the keys in that level.
|
||||
///
|
||||
/// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the
|
||||
/// number of keys in the level. For example, removing a document id from the facet value `3` could
|
||||
/// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted
|
||||
/// entirely. In that case, `DeletionResult::Remove` is returned. The parent of the deleted key must
|
||||
/// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well.
|
||||
///
|
||||
/// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the
|
||||
/// bounds of the keys of the level. For example, removing a document id from the facet value
|
||||
/// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore,
|
||||
/// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4).
|
||||
/// In that case `DeletionResult::Reduce` is returned. The parent of the reduced key may need to adjust
|
||||
/// its left bound as well.
|
||||
fn delete_in_level(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
level: u8,
|
||||
facet_value: &[u8],
|
||||
docids: &RoaringBitmap,
|
||||
) -> Result<DeletionResult> {
|
||||
if level == 0 {
|
||||
return self.delete_in_level_0(txn, field_id, facet_value, docids);
|
||||
}
|
||||
let (deletion_key, mut bitmap) =
|
||||
self.find_insertion_key_value(field_id, level, facet_value, txn)?;
|
||||
|
||||
let result = self.delete_in_level(txn, field_id, level - 1, facet_value, docids)?;
|
||||
|
||||
let mut decrease_size = false;
|
||||
let next_key = match result {
|
||||
DeletionResult::InPlace => {
|
||||
bitmap.bitmap -= docids;
|
||||
self.db.put(txn, &deletion_key.as_ref(), &bitmap)?;
|
||||
return Ok(DeletionResult::InPlace);
|
||||
}
|
||||
DeletionResult::Reduce { next } => next,
|
||||
DeletionResult::Remove { next } => {
|
||||
decrease_size = true;
|
||||
next
|
||||
}
|
||||
};
|
||||
// If either DeletionResult::Reduce or DeletionResult::Remove was returned,
|
||||
// then we may need to adjust the left_bound of the deletion key.
|
||||
|
||||
// If DeletionResult::Remove was returned, then we need to decrease the group
|
||||
// size of the deletion key.
|
||||
let mut updated_value = bitmap;
|
||||
if decrease_size {
|
||||
updated_value.size -= 1;
|
||||
}
|
||||
|
||||
if updated_value.size == 0 {
|
||||
self.db.delete(txn, &deletion_key.as_ref())?;
|
||||
Ok(DeletionResult::Remove { next: next_key })
|
||||
} else {
|
||||
let mut updated_deletion_key = deletion_key.clone();
|
||||
let reduced_range = facet_value == deletion_key.left_bound;
|
||||
if reduced_range {
|
||||
updated_deletion_key.left_bound = next_key.clone().unwrap();
|
||||
}
|
||||
updated_value.bitmap -= docids;
|
||||
let _ = self.db.delete(txn, &deletion_key.as_ref())?;
|
||||
self.db.put(txn, &updated_deletion_key.as_ref(), &updated_value)?;
|
||||
if reduced_range {
|
||||
Ok(DeletionResult::Reduce { next: next_key })
|
||||
} else {
|
||||
Ok(DeletionResult::InPlace)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn delete_in_level_0(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
facet_value: &[u8],
|
||||
docids: &RoaringBitmap,
|
||||
) -> Result<DeletionResult> {
|
||||
let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value };
|
||||
let mut bitmap = self.db.get(txn, &key)?.unwrap().bitmap;
|
||||
bitmap -= docids;
|
||||
|
||||
if bitmap.is_empty() {
|
||||
let mut next_key = None;
|
||||
if let Some((next, _)) =
|
||||
self.db.remap_data_type::<DecodeIgnore>().get_greater_than(txn, &key)?
|
||||
{
|
||||
if next.field_id == field_id && next.level == 0 {
|
||||
next_key = Some(next.left_bound.to_vec());
|
||||
}
|
||||
}
|
||||
self.db.delete(txn, &key)?;
|
||||
Ok(DeletionResult::Remove { next: next_key })
|
||||
} else {
|
||||
self.db.put(txn, &key, &FacetGroupValue { size: 1, bitmap })?;
|
||||
Ok(DeletionResult::InPlace)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
facet_value: &[u8],
|
||||
docids: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
if self
|
||||
.db
|
||||
.remap_data_type::<DecodeIgnore>()
|
||||
.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value })?
|
||||
.is_none()
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
let highest_level = get_highest_level(txn, self.db, field_id)?;
|
||||
|
||||
let result = self.delete_in_level(txn, field_id, highest_level, facet_value, docids)?;
|
||||
match result {
|
||||
DeletionResult::InPlace => return Ok(()),
|
||||
DeletionResult::Reduce { .. } => return Ok(()),
|
||||
DeletionResult::Remove { .. } => {}
|
||||
}
|
||||
|
||||
// if we either removed a key from the highest level, its size may have fallen
|
||||
// below `min_level_size`, in which case we need to remove the entire level
|
||||
|
||||
let mut highest_level_prefix = vec![];
|
||||
highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||
highest_level_prefix.push(highest_level);
|
||||
|
||||
if highest_level == 0
|
||||
|| self
|
||||
.db
|
||||
.remap_types::<Bytes, Bytes>()
|
||||
.prefix_iter(txn, &highest_level_prefix)?
|
||||
.count()
|
||||
>= self.min_level_size as usize
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
let mut to_delete = vec![];
|
||||
let mut iter =
|
||||
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?;
|
||||
for el in iter.by_ref() {
|
||||
let (k, _) = el?;
|
||||
to_delete.push(
|
||||
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
|
||||
.map_err(Error::Encoding)?
|
||||
.into_owned(),
|
||||
);
|
||||
}
|
||||
drop(iter);
|
||||
for k in to_delete {
|
||||
self.db.delete(txn, &k.as_ref())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FacetGroupKey<&'a [u8]> {
|
||||
|
||||
@@ -149,7 +149,7 @@ impl<'i> FacetsUpdate<'i> {
|
||||
self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
|
||||
|
||||
// See self::comparison_bench::benchmark_facet_indexing
|
||||
if self.data_size >= (self.database.len(wtxn)? / 50) {
|
||||
if self.data_size >= (self.database.len(wtxn)? / 500) {
|
||||
let field_ids =
|
||||
self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
|
||||
let bulk_update = FacetsUpdateBulk::new(
|
||||
@@ -429,7 +429,8 @@ pub(crate) mod test_helpers {
|
||||
max_group_size: self.max_group_size.get(),
|
||||
};
|
||||
let key_bytes = BoundCodec::bytes_encode(key).unwrap();
|
||||
update.insert(wtxn, field_id, &key_bytes, docids).unwrap();
|
||||
update.modify(wtxn, field_id, &key_bytes, Some(docids), None).unwrap();
|
||||
update.add_or_delete_level(wtxn, field_id).unwrap();
|
||||
}
|
||||
pub fn delete_single_docid<'a>(
|
||||
&self,
|
||||
@@ -455,7 +456,8 @@ pub(crate) mod test_helpers {
|
||||
max_group_size: self.max_group_size.get(),
|
||||
};
|
||||
let key_bytes = BoundCodec::bytes_encode(key).unwrap();
|
||||
update.delete(wtxn, field_id, &key_bytes, docids).unwrap();
|
||||
update.modify(wtxn, field_id, &key_bytes, None, Some(docids)).unwrap();
|
||||
update.add_or_delete_level(wtxn, field_id).unwrap();
|
||||
}
|
||||
|
||||
pub fn bulk_insert<'a, 'b>(
|
||||
|
||||
@@ -210,8 +210,7 @@ fn run_extraction_task<FE, FS, M>(
|
||||
let current_span = tracing::Span::current();
|
||||
|
||||
rayon::spawn(move || {
|
||||
let child_span =
|
||||
tracing::trace_span!(target: "", parent: ¤t_span, "extract_multiple_chunks");
|
||||
let child_span = tracing::trace_span!(target: "indexing::extract::details", parent: ¤t_span, "extract_multiple_chunks");
|
||||
let _entered = child_span.enter();
|
||||
puffin::profile_scope!("extract_multiple_chunks", name);
|
||||
match extract_fn(chunk, indexer) {
|
||||
|
||||
@@ -284,7 +284,7 @@ where
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip_all,
|
||||
target = "profile::indexing::details",
|
||||
target = "indexing::details",
|
||||
name = "index_documents_raw"
|
||||
)]
|
||||
pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
|
||||
|
||||
@@ -473,7 +473,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::FieldIdFacetIsEmptyDocids(_) => {
|
||||
let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids");
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_empty_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
|
||||
|
||||
@@ -1032,6 +1032,13 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
{
|
||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||
|
||||
let existing_fields: HashSet<_> = self
|
||||
.index
|
||||
.field_distribution(self.wtxn)?
|
||||
.into_iter()
|
||||
.filter_map(|(field, count)| (count != 0).then_some(field))
|
||||
.collect();
|
||||
|
||||
let old_faceted_fields = self.index.user_defined_faceted_fields(self.wtxn)?;
|
||||
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
@@ -1052,7 +1059,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
// index new fields as facets. It means that the distinct attribute,
|
||||
// an Asc/Desc criterion or a filtered attribute as be added or removed.
|
||||
let new_faceted_fields = self.index.user_defined_faceted_fields(self.wtxn)?;
|
||||
let faceted_updated = old_faceted_fields != new_faceted_fields;
|
||||
let faceted_updated =
|
||||
(&existing_fields - &old_faceted_fields) != (&existing_fields - &new_faceted_fields);
|
||||
|
||||
let stop_words_updated = self.update_stop_words()?;
|
||||
let non_separator_tokens_updated = self.update_non_separator_tokens()?;
|
||||
|
||||
@@ -59,8 +59,8 @@ pub enum EmbedErrorKind {
|
||||
OpenAiAuth(OpenAiError),
|
||||
#[error("sent too many requests to OpenAI: {0}")]
|
||||
OpenAiTooManyRequests(OpenAiError),
|
||||
#[error("received internal error from OpenAI: {0}")]
|
||||
OpenAiInternalServerError(OpenAiError),
|
||||
#[error("received internal error from OpenAI: {0:?}")]
|
||||
OpenAiInternalServerError(Option<OpenAiError>),
|
||||
#[error("sent too many tokens in a request to OpenAI: {0}")]
|
||||
OpenAiTooManyTokens(OpenAiError),
|
||||
#[error("received unhandled HTTP status code {0} from OpenAI")]
|
||||
@@ -106,7 +106,7 @@ impl EmbedError {
|
||||
Self { kind: EmbedErrorKind::OpenAiTooManyRequests(inner), fault: FaultSource::Runtime }
|
||||
}
|
||||
|
||||
pub(crate) fn openai_internal_server_error(inner: OpenAiError) -> EmbedError {
|
||||
pub(crate) fn openai_internal_server_error(inner: Option<OpenAiError>) -> EmbedError {
|
||||
Self { kind: EmbedErrorKind::OpenAiInternalServerError(inner), fault: FaultSource::Runtime }
|
||||
}
|
||||
|
||||
|
||||
@@ -261,3 +261,7 @@ impl DistributionShift {
|
||||
score
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn is_cuda_enabled() -> bool {
|
||||
cfg!(feature = "cuda")
|
||||
}
|
||||
|
||||
@@ -178,6 +178,8 @@ impl Embedder {
|
||||
retry.into_duration(attempt)
|
||||
}
|
||||
}?;
|
||||
|
||||
let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute
|
||||
tracing::warn!(
|
||||
"Attempt #{}, retrying after {}ms.",
|
||||
attempt,
|
||||
@@ -220,24 +222,12 @@ impl Embedder {
|
||||
error_response.error,
|
||||
)));
|
||||
}
|
||||
StatusCode::INTERNAL_SERVER_ERROR => {
|
||||
let error_response: OpenAiErrorResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(EmbedError::openai_unexpected)
|
||||
.map_err(Retry::retry_later)?;
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
| StatusCode::BAD_GATEWAY
|
||||
| StatusCode::SERVICE_UNAVAILABLE => {
|
||||
let error_response: Result<OpenAiErrorResponse, _> = response.json().await;
|
||||
return Err(Retry::retry_later(EmbedError::openai_internal_server_error(
|
||||
error_response.error,
|
||||
)));
|
||||
}
|
||||
StatusCode::SERVICE_UNAVAILABLE => {
|
||||
let error_response: OpenAiErrorResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(EmbedError::openai_unexpected)
|
||||
.map_err(Retry::retry_later)?;
|
||||
return Err(Retry::retry_later(EmbedError::openai_internal_server_error(
|
||||
error_response.error,
|
||||
error_response.ok().map(|error_response| error_response.error),
|
||||
)));
|
||||
}
|
||||
StatusCode::BAD_REQUEST => {
|
||||
@@ -248,14 +238,14 @@ impl Embedder {
|
||||
.map_err(EmbedError::openai_unexpected)
|
||||
.map_err(Retry::retry_later)?;
|
||||
|
||||
tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
|
||||
tracing::warn!("OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
|
||||
|
||||
return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens(
|
||||
error_response.error,
|
||||
)));
|
||||
}
|
||||
code => {
|
||||
return Err(Retry::give_up(EmbedError::openai_unhandled_status_code(
|
||||
return Err(Retry::retry_later(EmbedError::openai_unhandled_status_code(
|
||||
code.as_u16(),
|
||||
)));
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::ops::Range;
|
||||
use std::time::Duration;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -16,6 +17,51 @@ enum SpanStatus {
|
||||
pub struct CallStats {
|
||||
pub call_count: usize,
|
||||
pub time: u64,
|
||||
pub self_time: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SelfTime {
|
||||
child_ranges: Vec<Range<Duration>>,
|
||||
}
|
||||
|
||||
impl SelfTime {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub fn add_child_range(&mut self, child_range: Range<Duration>) {
|
||||
self.child_ranges.push(child_range)
|
||||
}
|
||||
|
||||
pub fn self_duration(&mut self, self_range: Range<Duration>) -> Duration {
|
||||
if self.child_ranges.is_empty() {
|
||||
return self_range.end - self_range.start;
|
||||
}
|
||||
|
||||
// by sorting child ranges by their start time,
|
||||
// we make sure that no child will start before the last one we visited.
|
||||
self.child_ranges
|
||||
.sort_by(|left, right| left.start.cmp(&right.start).then(left.end.cmp(&right.end)));
|
||||
// self duration computed by adding all the segments where the span is not executing a child
|
||||
let mut self_duration = Duration::from_nanos(0);
|
||||
|
||||
// last point in time where we are certain that this span was not executing a child.
|
||||
let mut committed_point = self_range.start;
|
||||
|
||||
for child_range in &self.child_ranges {
|
||||
if child_range.start > committed_point {
|
||||
// we add to the self duration the point between the end of the latest span and the beginning of the next span
|
||||
self_duration += child_range.start - committed_point;
|
||||
}
|
||||
if committed_point < child_range.end {
|
||||
// then we set ourselves to the end of the latest span
|
||||
committed_point = child_range.end;
|
||||
}
|
||||
}
|
||||
|
||||
self_duration
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_call_stats<R: std::io::Read>(
|
||||
@@ -23,6 +69,9 @@ pub fn to_call_stats<R: std::io::Read>(
|
||||
) -> Result<BTreeMap<String, CallStats>, Error> {
|
||||
let mut calls = HashMap::new();
|
||||
let mut spans = HashMap::new();
|
||||
let mut last_point = Duration::from_nanos(0);
|
||||
let mut first_point = None;
|
||||
let mut total_self_time = SelfTime::new();
|
||||
for entry in trace {
|
||||
let entry = entry?;
|
||||
match entry {
|
||||
@@ -31,10 +80,11 @@ pub fn to_call_stats<R: std::io::Read>(
|
||||
}
|
||||
Entry::NewThread(_) => {}
|
||||
Entry::NewSpan(span) => {
|
||||
spans.insert(span.id, (span, SpanStatus::Outside));
|
||||
spans.insert(span.id, (span, SpanStatus::Outside, SelfTime::new()));
|
||||
}
|
||||
Entry::SpanEnter(SpanEnter { id, time, memory: _ }) => {
|
||||
let (_, status) = spans.get_mut(&id).unwrap();
|
||||
first_point.get_or_insert(time);
|
||||
let (_, status, _) = spans.get_mut(&id).unwrap();
|
||||
|
||||
let SpanStatus::Outside = status else {
|
||||
continue;
|
||||
@@ -43,18 +93,32 @@ pub fn to_call_stats<R: std::io::Read>(
|
||||
*status = SpanStatus::Inside(time);
|
||||
}
|
||||
Entry::SpanExit(SpanExit { id, time: end, memory: _ }) => {
|
||||
let (span, status) = spans.get_mut(&id).unwrap();
|
||||
let (span, status, self_time) = spans.get_mut(&id).unwrap();
|
||||
|
||||
let SpanStatus::Inside(begin) = status else {
|
||||
continue;
|
||||
};
|
||||
let begin = *begin;
|
||||
|
||||
if last_point < end {
|
||||
last_point = end;
|
||||
}
|
||||
|
||||
*status = SpanStatus::Outside;
|
||||
|
||||
let self_range = begin..end;
|
||||
|
||||
let self_duration = self_time.self_duration(self_range.clone());
|
||||
*self_time = SelfTime::new();
|
||||
|
||||
let span = *span;
|
||||
if let Some(parent_id) = span.parent_id {
|
||||
let (_, _, parent_self_time) = spans.get_mut(&parent_id).unwrap();
|
||||
parent_self_time.add_child_range(self_range.clone())
|
||||
}
|
||||
total_self_time.add_child_range(self_range);
|
||||
let (_, call_list) = calls.get_mut(&span.call_id).unwrap();
|
||||
call_list.push(end - begin);
|
||||
call_list.push((end - begin, self_duration));
|
||||
}
|
||||
Entry::SpanClose(SpanClose { id, time: _ }) => {
|
||||
spans.remove(&id);
|
||||
@@ -63,17 +127,31 @@ pub fn to_call_stats<R: std::io::Read>(
|
||||
}
|
||||
}
|
||||
|
||||
let total_self_time = first_point
|
||||
.map(|first_point| (first_point, total_self_time.self_duration(first_point..last_point)));
|
||||
|
||||
Ok(calls
|
||||
.into_iter()
|
||||
.map(|(_, (call_site, calls))| (site_to_string(call_site), calls_to_stats(calls)))
|
||||
.chain(total_self_time.map(|(first_point, total_self_time)| {
|
||||
(
|
||||
"::meta::total".to_string(),
|
||||
CallStats {
|
||||
call_count: 1,
|
||||
time: (last_point - first_point).as_nanos() as u64,
|
||||
self_time: total_self_time.as_nanos() as u64,
|
||||
},
|
||||
)
|
||||
}))
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn site_to_string(call_site: NewCallsite) -> String {
|
||||
format!("{}::{}", call_site.target, call_site.name)
|
||||
}
|
||||
fn calls_to_stats(calls: Vec<Duration>) -> CallStats {
|
||||
fn calls_to_stats(calls: Vec<(Duration, Duration)>) -> CallStats {
|
||||
let nb = calls.len();
|
||||
let sum: Duration = calls.iter().sum();
|
||||
CallStats { call_count: nb, time: sum.as_nanos() as u64 }
|
||||
let sum: Duration = calls.iter().map(|(total, _)| total).sum();
|
||||
let self_sum: Duration = calls.iter().map(|(_, self_duration)| self_duration).sum();
|
||||
CallStats { call_count: nb, time: sum.as_nanos() as u64, self_time: self_sum.as_nanos() as u64 }
|
||||
}
|
||||
|
||||
164
workloads/hackernews.json
Normal file
164
workloads/hackernews.json
Normal file
@@ -0,0 +1,164 @@
|
||||
{
|
||||
"name": "hackernews.ndjson_1M",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-100_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-100_000.ndjson",
|
||||
"sha256": "60ecd23485d560edbd90d9ca31f0e6dba1455422f2a44e402600fbb5f7f1b213"
|
||||
},
|
||||
"hackernews-200_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-200_000.ndjson",
|
||||
"sha256": "785b0271fdb47cba574fab617d5d332276b835c05dd86e4a95251cf7892a1685"
|
||||
},
|
||||
"hackernews-300_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-300_000.ndjson",
|
||||
"sha256": "de73c7154652eddfaf69cdc3b2f824d5c452f095f40a20a1c97bb1b5c4d80ab2"
|
||||
},
|
||||
"hackernews-400_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-400_000.ndjson",
|
||||
"sha256": "c1b00a24689110f366447e434c201c086d6f456d54ed1c4995894102794d8fe7"
|
||||
},
|
||||
"hackernews-500_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-500_000.ndjson",
|
||||
"sha256": "ae98f9dbef8193d750e3e2dbb6a91648941a1edca5f6e82c143e7996f4840083"
|
||||
},
|
||||
"hackernews-600_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-600_000.ndjson",
|
||||
"sha256": "b495fdc72c4a944801f786400f22076ab99186bee9699f67cbab2f21f5b74dbe"
|
||||
},
|
||||
"hackernews-700_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-700_000.ndjson",
|
||||
"sha256": "4b2c63974f3dabaa4954e3d4598b48324d03c522321ac05b0d583f36cb78a28b"
|
||||
},
|
||||
"hackernews-800_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-800_000.ndjson",
|
||||
"sha256": "cb7b6afe0e6caa1be111be256821bc63b0771b2a0e1fad95af7aaeeffd7ba546"
|
||||
},
|
||||
"hackernews-900_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-900_000.ndjson",
|
||||
"sha256": "e1154ddcd398f1c867758a93db5bcb21a07b9e55530c188a2917fdef332d3ba9"
|
||||
},
|
||||
"hackernews-1_000_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-1_000_000.ndjson",
|
||||
"sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe"
|
||||
}
|
||||
},
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-100_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-200_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-300_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-400_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-500_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-600_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-700_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-800_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-900_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-1_000_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
44
workloads/movies-nothreads.json
Normal file
44
workloads/movies-nothreads.json
Normal file
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"name": "movies.json,no-threads",
|
||||
"run_count": 2,
|
||||
"extra_cli_args": [
|
||||
"--max-indexing-threads=1"
|
||||
],
|
||||
"assets": {
|
||||
"movies.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
|
||||
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
|
||||
}
|
||||
},
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"overview"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"release_date"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"release_date"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "movies.json"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
42
workloads/movies.json
Normal file
42
workloads/movies.json
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
"name": "movies.json",
|
||||
"run_count": 10,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"movies.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
|
||||
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
|
||||
}
|
||||
},
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"overview"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"release_date"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"release_date"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "movies.json"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -11,5 +11,34 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.79"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
cargo_metadata = "0.18.1"
|
||||
clap = { version = "4.4.14", features = ["derive"] }
|
||||
futures-core = "0.3.30"
|
||||
futures-util = "0.3.30"
|
||||
reqwest = { version = "0.11.23", features = [
|
||||
"stream",
|
||||
"json",
|
||||
"rustls-tls",
|
||||
], default_features = false }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = "1.0.111"
|
||||
sha2 = "0.10.8"
|
||||
sysinfo = "0.30.5"
|
||||
time = { version = "0.3.32", features = [
|
||||
"serde",
|
||||
"serde-human-readable",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.35.1", features = [
|
||||
"rt",
|
||||
"net",
|
||||
"time",
|
||||
"process",
|
||||
"signal",
|
||||
] }
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = "0.3.18"
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
uuid = { version = "1.7.0", features = ["v7", "serde"] }
|
||||
|
||||
250
xtask/src/bench/assets.rs
Normal file
250
xtask/src/bench/assets.rs
Normal file
@@ -0,0 +1,250 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::{Read as _, Seek as _, Write as _};
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use futures_util::TryStreamExt as _;
|
||||
use serde::Deserialize;
|
||||
use sha2::Digest;
|
||||
|
||||
use super::client::Client;
|
||||
|
||||
#[derive(Deserialize, Clone)]
|
||||
pub struct Asset {
|
||||
pub local_location: Option<String>,
|
||||
pub remote_location: Option<String>,
|
||||
#[serde(default)]
|
||||
pub format: AssetFormat,
|
||||
pub sha256: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Default, Copy, Clone)]
|
||||
pub enum AssetFormat {
|
||||
#[default]
|
||||
Auto,
|
||||
Json,
|
||||
NdJson,
|
||||
Raw,
|
||||
}
|
||||
|
||||
impl AssetFormat {
|
||||
pub fn to_content_type(self, filename: &str) -> &'static str {
|
||||
match self {
|
||||
AssetFormat::Auto => Self::auto_detect(filename).to_content_type(filename),
|
||||
AssetFormat::Json => "application/json",
|
||||
AssetFormat::NdJson => "application/x-ndjson",
|
||||
AssetFormat::Raw => "application/octet-stream",
|
||||
}
|
||||
}
|
||||
|
||||
fn auto_detect(filename: &str) -> Self {
|
||||
let path = std::path::Path::new(filename);
|
||||
match path.extension().and_then(|extension| extension.to_str()) {
|
||||
Some(extension) if extension.eq_ignore_ascii_case("json") => Self::Json,
|
||||
Some(extension) if extension.eq_ignore_ascii_case("ndjson") => Self::NdJson,
|
||||
extension => {
|
||||
tracing::warn!(asset = filename, ?extension, "asset has format `Auto`, but extension was not recognized. Specify `Raw` format to suppress this warning.");
|
||||
AssetFormat::Raw
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fetch_asset(
|
||||
name: &str,
|
||||
assets: &BTreeMap<String, Asset>,
|
||||
asset_folder: &str,
|
||||
) -> anyhow::Result<(std::fs::File, AssetFormat)> {
|
||||
let asset =
|
||||
assets.get(name).with_context(|| format!("could not find asset with name '{name}'"))?;
|
||||
let filename = if let Some(local_filename) = &asset.local_location {
|
||||
local_filename.clone()
|
||||
} else {
|
||||
format!("{asset_folder}/{name}")
|
||||
};
|
||||
|
||||
Ok((
|
||||
std::fs::File::open(&filename)
|
||||
.with_context(|| format!("could not open asset '{name}' at '{filename}'"))?,
|
||||
asset.format,
|
||||
))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(client, assets), fields(asset_count = assets.len()))]
|
||||
pub async fn fetch_assets(
|
||||
client: &Client,
|
||||
assets: &BTreeMap<String, Asset>,
|
||||
asset_folder: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut download_tasks = tokio::task::JoinSet::new();
|
||||
for (name, asset) in assets {
|
||||
// trying local
|
||||
if let Some(local) = &asset.local_location {
|
||||
match std::fs::File::open(local) {
|
||||
Ok(file) => {
|
||||
if check_sha256(name, asset, file)? {
|
||||
continue;
|
||||
} else {
|
||||
tracing::warn!(asset = name, file = local, "found local resource for asset but hash differed, skipping to asset store");
|
||||
}
|
||||
}
|
||||
Err(error) => match error.kind() {
|
||||
std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */
|
||||
}
|
||||
_ => tracing::warn!(
|
||||
error = &error as &dyn std::error::Error,
|
||||
"error checking local resource, skipping to asset store"
|
||||
),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// checking asset store
|
||||
let store_filename = format!("{}/{}", asset_folder, name);
|
||||
|
||||
match std::fs::File::open(&store_filename) {
|
||||
Ok(file) => {
|
||||
if check_sha256(name, asset, file)? {
|
||||
continue;
|
||||
} else {
|
||||
tracing::warn!(asset = name, file = store_filename, "found resource for asset in asset store, but hash differed, skipping to remote method");
|
||||
}
|
||||
}
|
||||
Err(error) => match error.kind() {
|
||||
std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */
|
||||
}
|
||||
_ => tracing::warn!(
|
||||
error = &error as &dyn std::error::Error,
|
||||
"error checking resource in store, skipping to remote method"
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
// downloading remote
|
||||
match &asset.remote_location {
|
||||
Some(location) => {
|
||||
std::fs::create_dir_all(asset_folder).with_context(|| format!("could not create asset folder at {asset_folder}"))?;
|
||||
download_tasks.spawn({
|
||||
let client = client.clone();
|
||||
let name = name.to_string();
|
||||
let location = location.to_string();
|
||||
let store_filename = store_filename.clone();
|
||||
let asset = asset.clone();
|
||||
download_asset(client, name, asset, location, store_filename)});
|
||||
},
|
||||
None => bail!("asset {name} has no remote location, but was not found locally or in the asset store"),
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(res) = download_tasks.join_next().await {
|
||||
res.context("download task panicked")?.context("download task failed")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_sha256(name: &str, asset: &Asset, mut file: std::fs::File) -> anyhow::Result<bool> {
|
||||
let mut bytes = Vec::new();
|
||||
file.read_to_end(&mut bytes).with_context(|| format!("hashing file for asset {name}"))?;
|
||||
let mut file_hash = sha2::Sha256::new();
|
||||
file_hash.update(&bytes);
|
||||
let file_hash = file_hash.finalize();
|
||||
let file_hash = format!("{:x}", file_hash);
|
||||
tracing::debug!(hash = file_hash, "hashed local file");
|
||||
|
||||
Ok(match &asset.sha256 {
|
||||
Some(hash) => {
|
||||
tracing::debug!(hash, "hash from workload");
|
||||
if hash.to_ascii_lowercase() == file_hash {
|
||||
true
|
||||
} else {
|
||||
tracing::warn!(
|
||||
file_hash,
|
||||
asset_hash = hash.to_ascii_lowercase(),
|
||||
"hashes don't match"
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
None => {
|
||||
tracing::warn!(sha256 = file_hash, "Skipping hash for asset {name} that doesn't have one. Please add it to workload file");
|
||||
true
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(client, asset, name), fields(asset = name))]
|
||||
async fn download_asset(
|
||||
client: Client,
|
||||
name: String,
|
||||
asset: Asset,
|
||||
src: String,
|
||||
dest_filename: String,
|
||||
) -> anyhow::Result<()> {
|
||||
let context = || format!("failure downloading asset {name} from {src}");
|
||||
|
||||
let response = client.get(&src).send().await.with_context(context)?;
|
||||
|
||||
let file = std::fs::File::options()
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.write(true)
|
||||
.read(true)
|
||||
.open(&dest_filename)
|
||||
.with_context(|| format!("creating destination file {dest_filename}"))
|
||||
.with_context(context)?;
|
||||
|
||||
let mut dest = std::io::BufWriter::new(
|
||||
file.try_clone().context("cloning I/O handle").with_context(context)?,
|
||||
);
|
||||
|
||||
let total_len: Option<u64> = response
|
||||
.headers()
|
||||
.get(reqwest::header::CONTENT_LENGTH)
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.and_then(|value| value.parse().ok());
|
||||
|
||||
let progress = tokio::spawn({
|
||||
let name = name.clone();
|
||||
async move {
|
||||
loop {
|
||||
match file.metadata().context("could not get file metadata") {
|
||||
Ok(metadata) => {
|
||||
let len = metadata.len();
|
||||
tracing::info!(
|
||||
asset = name,
|
||||
downloaded_bytes = len,
|
||||
total_bytes = total_len,
|
||||
"asset download in progress"
|
||||
);
|
||||
}
|
||||
Err(error) => {
|
||||
tracing::warn!(%error, "could not get file metadata");
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_secs(60)).await;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let writing_context = || format!("while writing to destination file at {dest_filename}");
|
||||
|
||||
let mut response = response.bytes_stream();
|
||||
|
||||
while let Some(bytes) =
|
||||
response.try_next().await.context("while downloading file").with_context(context)?
|
||||
{
|
||||
dest.write_all(&bytes).with_context(writing_context).with_context(context)?;
|
||||
}
|
||||
|
||||
progress.abort();
|
||||
|
||||
let mut file = dest.into_inner().with_context(writing_context).with_context(context)?;
|
||||
|
||||
file.rewind().context("while rewinding asset file")?;
|
||||
|
||||
if !check_sha256(&name, &asset, file)? {
|
||||
bail!("asset '{name}': sha256 mismatch for file {dest_filename} downloaded from {src}")
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
80
xtask/src/bench/client.rs
Normal file
80
xtask/src/bench/client.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
use anyhow::Context;
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Client {
|
||||
base_url: Option<String>,
|
||||
client: reqwest::Client,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub fn new(
|
||||
base_url: Option<String>,
|
||||
api_key: Option<&str>,
|
||||
timeout: Option<std::time::Duration>,
|
||||
) -> anyhow::Result<Self> {
|
||||
let mut headers = reqwest::header::HeaderMap::new();
|
||||
if let Some(api_key) = api_key {
|
||||
headers.append(
|
||||
reqwest::header::AUTHORIZATION,
|
||||
reqwest::header::HeaderValue::from_str(&format!("Bearer {api_key}"))
|
||||
.context("Invalid authorization header")?,
|
||||
);
|
||||
}
|
||||
|
||||
let client = reqwest::ClientBuilder::new().default_headers(headers);
|
||||
let client = if let Some(timeout) = timeout { client.timeout(timeout) } else { client };
|
||||
let client = client.build()?;
|
||||
Ok(Self { base_url, client })
|
||||
}
|
||||
|
||||
pub fn request(&self, method: reqwest::Method, route: &str) -> reqwest::RequestBuilder {
|
||||
if let Some(base_url) = &self.base_url {
|
||||
if route.is_empty() {
|
||||
self.client.request(method, base_url)
|
||||
} else {
|
||||
self.client.request(method, format!("{}/{}", base_url, route))
|
||||
}
|
||||
} else {
|
||||
self.client.request(method, route)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get(&self, route: &str) -> reqwest::RequestBuilder {
|
||||
self.request(reqwest::Method::GET, route)
|
||||
}
|
||||
|
||||
pub fn put(&self, route: &str) -> reqwest::RequestBuilder {
|
||||
self.request(reqwest::Method::PUT, route)
|
||||
}
|
||||
|
||||
pub fn post(&self, route: &str) -> reqwest::RequestBuilder {
|
||||
self.request(reqwest::Method::POST, route)
|
||||
}
|
||||
|
||||
pub fn delete(&self, route: &str) -> reqwest::RequestBuilder {
|
||||
self.request(reqwest::Method::DELETE, route)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize)]
|
||||
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
|
||||
pub enum Method {
|
||||
Get,
|
||||
Post,
|
||||
Patch,
|
||||
Delete,
|
||||
Put,
|
||||
}
|
||||
|
||||
impl From<Method> for reqwest::Method {
|
||||
fn from(value: Method) -> Self {
|
||||
match value {
|
||||
Method::Get => Self::GET,
|
||||
Method::Post => Self::POST,
|
||||
Method::Patch => Self::PATCH,
|
||||
Method::Delete => Self::DELETE,
|
||||
Method::Put => Self::PUT,
|
||||
}
|
||||
}
|
||||
}
|
||||
194
xtask/src/bench/command.rs
Normal file
194
xtask/src/bench/command.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt::Display;
|
||||
use std::io::Read as _;
|
||||
|
||||
use anyhow::{bail, Context as _};
|
||||
use serde::Deserialize;
|
||||
|
||||
use super::assets::{fetch_asset, Asset};
|
||||
use super::client::{Client, Method};
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct Command {
|
||||
pub route: String,
|
||||
pub method: Method,
|
||||
#[serde(default)]
|
||||
pub body: Body,
|
||||
#[serde(default)]
|
||||
pub synchronous: SyncMode,
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum Body {
|
||||
Inline {
|
||||
inline: serde_json::Value,
|
||||
},
|
||||
Asset {
|
||||
asset: String,
|
||||
},
|
||||
#[default]
|
||||
Empty,
|
||||
}
|
||||
|
||||
impl Body {
|
||||
pub fn get(
|
||||
self,
|
||||
assets: &BTreeMap<String, Asset>,
|
||||
asset_folder: &str,
|
||||
) -> anyhow::Result<Option<(Vec<u8>, &'static str)>> {
|
||||
Ok(match self {
|
||||
Body::Inline { inline: body } => Some((
|
||||
serde_json::to_vec(&body)
|
||||
.context("serializing to bytes")
|
||||
.context("while getting inline body")?,
|
||||
"application/json",
|
||||
)),
|
||||
Body::Asset { asset: name } => Some({
|
||||
let context = || format!("while getting body from asset '{name}'");
|
||||
let (mut file, format) =
|
||||
fetch_asset(&name, assets, asset_folder).with_context(context)?;
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf).with_context(context)?;
|
||||
(buf, format.to_content_type(&name))
|
||||
}),
|
||||
Body::Empty => None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Command {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?} {} ({:?})", self.method, self.route, self.synchronous)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy, Deserialize)]
|
||||
pub enum SyncMode {
|
||||
DontWait,
|
||||
#[default]
|
||||
WaitForResponse,
|
||||
WaitForTask,
|
||||
}
|
||||
|
||||
pub async fn run_batch(
|
||||
client: &Client,
|
||||
batch: &[Command],
|
||||
assets: &BTreeMap<String, Asset>,
|
||||
asset_folder: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
let [.., last] = batch else { return Ok(()) };
|
||||
let sync = last.synchronous;
|
||||
|
||||
let mut tasks = tokio::task::JoinSet::new();
|
||||
|
||||
for command in batch {
|
||||
// FIXME: you probably don't want to copy assets everytime here
|
||||
tasks.spawn({
|
||||
let client = client.clone();
|
||||
let command = command.clone();
|
||||
let assets = assets.clone();
|
||||
let asset_folder = asset_folder.to_owned();
|
||||
|
||||
async move { run(client, command, &assets, &asset_folder).await }
|
||||
});
|
||||
}
|
||||
|
||||
while let Some(result) = tasks.join_next().await {
|
||||
result
|
||||
.context("panicked while executing command")?
|
||||
.context("error while executing command")?;
|
||||
}
|
||||
|
||||
match sync {
|
||||
SyncMode::DontWait => {}
|
||||
SyncMode::WaitForResponse => {}
|
||||
SyncMode::WaitForTask => wait_for_tasks(client).await?,
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn wait_for_tasks(client: &Client) -> anyhow::Result<()> {
|
||||
loop {
|
||||
let response = client
|
||||
.get("tasks?statuses=enqueued,processing")
|
||||
.send()
|
||||
.await
|
||||
.context("could not wait for tasks")?;
|
||||
let response: serde_json::Value = response
|
||||
.json()
|
||||
.await
|
||||
.context("could not deserialize response to JSON")
|
||||
.context("could not wait for tasks")?;
|
||||
match response.get("total") {
|
||||
Some(serde_json::Value::Number(number)) => {
|
||||
let number = number.as_u64().with_context(|| {
|
||||
format!("waiting for tasks: could not parse 'total' as integer, got {}", number)
|
||||
})?;
|
||||
if number == 0 {
|
||||
break;
|
||||
} else {
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Some(thing_else) => {
|
||||
bail!(format!(
|
||||
"waiting for tasks: could not parse 'total' as a number, got '{thing_else}'"
|
||||
))
|
||||
}
|
||||
None => {
|
||||
bail!(format!(
|
||||
"waiting for tasks: expected response to contain 'total', got '{response}'"
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(client, command, assets, asset_folder), fields(command = %command))]
|
||||
pub async fn run(
|
||||
client: Client,
|
||||
mut command: Command,
|
||||
assets: &BTreeMap<String, Asset>,
|
||||
asset_folder: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
// memtake the body here to leave an empty body in its place, so that command is not partially moved-out
|
||||
let body = std::mem::take(&mut command.body)
|
||||
.get(assets, asset_folder)
|
||||
.with_context(|| format!("while getting body for command {command}"))?;
|
||||
|
||||
let request = client.request(command.method.into(), &command.route);
|
||||
|
||||
let request = if let Some((body, content_type)) = body {
|
||||
request.body(body).header(reqwest::header::CONTENT_TYPE, content_type)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
|
||||
let response =
|
||||
request.send().await.with_context(|| format!("error sending command: {}", command))?;
|
||||
|
||||
let code = response.status();
|
||||
if code.is_client_error() {
|
||||
tracing::error!(%command, %code, "error in workload file");
|
||||
let response: serde_json::Value = response
|
||||
.json()
|
||||
.await
|
||||
.context("could not deserialize response as JSON")
|
||||
.context("parsing error in workload file when sending command")?;
|
||||
bail!("error in workload file: server responded with error code {code} and '{response}'")
|
||||
} else if code.is_server_error() {
|
||||
tracing::error!(%command, %code, "server error");
|
||||
let response: serde_json::Value = response
|
||||
.json()
|
||||
.await
|
||||
.context("could not deserialize response as JSON")
|
||||
.context("parsing server error when sending command")?;
|
||||
bail!("server error: server responded with error code {code} and '{response}'")
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
167
xtask/src/bench/dashboard.rs
Normal file
167
xtask/src/bench/dashboard.rs
Normal file
@@ -0,0 +1,167 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use serde_json::json;
|
||||
use tokio::signal::ctrl_c;
|
||||
use tokio::task::AbortHandle;
|
||||
use tracing_trace::processor::span_stats::CallStats;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::client::Client;
|
||||
use super::env_info;
|
||||
use super::workload::Workload;
|
||||
|
||||
pub async fn cancel_on_ctrl_c(
|
||||
invocation_uuid: Uuid,
|
||||
dashboard_client: Client,
|
||||
abort_handle: AbortHandle,
|
||||
) {
|
||||
tracing::info!("press Ctrl-C to cancel the invocation");
|
||||
match ctrl_c().await {
|
||||
Ok(()) => {
|
||||
tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation");
|
||||
mark_as_failed(dashboard_client, invocation_uuid, None).await;
|
||||
abort_handle.abort();
|
||||
}
|
||||
Err(error) => tracing::warn!(
|
||||
error = &error as &dyn std::error::Error,
|
||||
"failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C"
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn mark_as_failed(
|
||||
dashboard_client: Client,
|
||||
invocation_uuid: Uuid,
|
||||
failure_reason: Option<String>,
|
||||
) {
|
||||
let response = dashboard_client
|
||||
.post("cancel-invocation")
|
||||
.json(&json!({
|
||||
"invocation_uuid": invocation_uuid,
|
||||
"failure_reason": failure_reason,
|
||||
}))
|
||||
.send()
|
||||
.await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(response_error) => {
|
||||
tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if !response.status().is_success() {
|
||||
tracing::error!(
|
||||
%invocation_uuid,
|
||||
"could not mark invocation as failed: {}",
|
||||
response.text().await.unwrap()
|
||||
);
|
||||
return;
|
||||
}
|
||||
tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled");
|
||||
}
|
||||
|
||||
pub async fn send_machine_info(
|
||||
dashboard_client: &Client,
|
||||
env: &env_info::Environment,
|
||||
) -> anyhow::Result<()> {
|
||||
let response = dashboard_client
|
||||
.put("machine")
|
||||
.json(&json!({"hostname": env.hostname}))
|
||||
.send()
|
||||
.await
|
||||
.context("sending machine information")?;
|
||||
if !response.status().is_success() {
|
||||
bail!(
|
||||
"could not send machine information: {} {}",
|
||||
response.status(),
|
||||
response.text().await.unwrap_or_else(|_| "unknown".into())
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn create_invocation(
|
||||
dashboard_client: &Client,
|
||||
build_info: build_info::BuildInfo,
|
||||
commit_message: &str,
|
||||
env: env_info::Environment,
|
||||
max_workloads: usize,
|
||||
reason: Option<&str>,
|
||||
) -> anyhow::Result<Uuid> {
|
||||
let response = dashboard_client
|
||||
.put("invocation")
|
||||
.json(&json!({
|
||||
"commit": {
|
||||
"sha1": build_info.commit_sha1,
|
||||
"message": commit_message,
|
||||
"commit_date": build_info.commit_timestamp,
|
||||
"branch": build_info.branch,
|
||||
"tag": build_info.describe.and_then(|describe| describe.as_tag()),
|
||||
},
|
||||
"machine_hostname": env.hostname,
|
||||
"max_workloads": max_workloads,
|
||||
"reason": reason
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.context("sending invocation")?;
|
||||
if !response.status().is_success() {
|
||||
bail!(
|
||||
"could not send new invocation: {}",
|
||||
response.text().await.unwrap_or_else(|_| "unknown".into())
|
||||
);
|
||||
}
|
||||
let invocation_uuid: Uuid =
|
||||
response.json().await.context("could not deserialize invocation response as JSON")?;
|
||||
Ok(invocation_uuid)
|
||||
}
|
||||
|
||||
pub async fn create_workload(
|
||||
dashboard_client: &Client,
|
||||
invocation_uuid: Uuid,
|
||||
workload: &Workload,
|
||||
) -> anyhow::Result<Uuid> {
|
||||
let response = dashboard_client
|
||||
.put("workload")
|
||||
.json(&json!({
|
||||
"invocation_uuid": invocation_uuid,
|
||||
"name": &workload.name,
|
||||
"max_runs": workload.run_count,
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.context("could not create new workload")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
bail!("creating new workload failed: {}", response.text().await.unwrap())
|
||||
}
|
||||
|
||||
let workload_uuid: Uuid =
|
||||
response.json().await.context("could not deserialize JSON as UUID")?;
|
||||
Ok(workload_uuid)
|
||||
}
|
||||
|
||||
pub async fn create_run(
|
||||
dashboard_client: Client,
|
||||
workload_uuid: Uuid,
|
||||
report: &BTreeMap<String, CallStats>,
|
||||
) -> anyhow::Result<()> {
|
||||
let response = dashboard_client
|
||||
.put("run")
|
||||
.json(&json!({
|
||||
"workload_uuid": workload_uuid,
|
||||
"data": report
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.context("sending new run")?;
|
||||
if !response.status().is_success() {
|
||||
bail!(
|
||||
"sending new run failed: {}",
|
||||
response.text().await.unwrap_or_else(|_| "unknown".into())
|
||||
)
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
75
xtask/src/bench/env_info.rs
Normal file
75
xtask/src/bench/env_info.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Environment {
|
||||
pub hostname: Option<String>,
|
||||
pub cpu: String,
|
||||
|
||||
/// Advertised or nominal clock speed in Hertz.
|
||||
pub clock_speed: u64,
|
||||
|
||||
/// Total number of bytes of memory provided by the system. */
|
||||
pub memory: u64,
|
||||
pub os_type: String,
|
||||
pub software: Vec<VersionInfo>,
|
||||
|
||||
pub user_name: String,
|
||||
|
||||
/// Is set true when the data was gathered by a manual run,
|
||||
/// possibly on a developer machine, instead of the usual benchmark server.
|
||||
pub manual_run: bool,
|
||||
}
|
||||
|
||||
impl Environment {
|
||||
pub fn generate_from_current_config() -> Self {
|
||||
use sysinfo::System;
|
||||
|
||||
let unknown_string = String::from("Unknown");
|
||||
let mut system = System::new();
|
||||
system.refresh_cpu();
|
||||
system.refresh_cpu_frequency();
|
||||
system.refresh_memory();
|
||||
|
||||
let (cpu, frequency) = match system.cpus().first() {
|
||||
Some(cpu) => (
|
||||
format!("{} @ {:.2}GHz", cpu.brand(), cpu.frequency() as f64 / 1000.0),
|
||||
cpu.frequency() * 1_000_000,
|
||||
),
|
||||
None => (unknown_string.clone(), 0),
|
||||
};
|
||||
|
||||
let mut software = Vec::new();
|
||||
if let Some(distribution) = System::name() {
|
||||
software
|
||||
.push(VersionInfo { name: distribution, version: String::from("distribution") });
|
||||
}
|
||||
if let Some(kernel) = System::kernel_version() {
|
||||
software.push(VersionInfo { name: kernel, version: String::from("kernel") });
|
||||
}
|
||||
if let Some(os) = System::os_version() {
|
||||
software.push(VersionInfo { name: os, version: String::from("kernel-release") });
|
||||
}
|
||||
if let Some(arch) = System::cpu_arch() {
|
||||
software.push(VersionInfo { name: arch, version: String::from("arch") });
|
||||
}
|
||||
|
||||
Self {
|
||||
hostname: System::host_name(),
|
||||
cpu,
|
||||
clock_speed: frequency,
|
||||
memory: system.total_memory(),
|
||||
os_type: System::long_os_version().unwrap_or(unknown_string.clone()),
|
||||
user_name: System::name().unwrap_or(unknown_string.clone()),
|
||||
manual_run: false,
|
||||
software,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct VersionInfo {
|
||||
pub name: String,
|
||||
pub version: String,
|
||||
}
|
||||
112
xtask/src/bench/meili_process.rs
Normal file
112
xtask/src/bench/meili_process.rs
Normal file
@@ -0,0 +1,112 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use anyhow::{bail, Context as _};
|
||||
|
||||
use super::assets::Asset;
|
||||
use super::client::Client;
|
||||
use super::workload::Workload;
|
||||
|
||||
pub async fn kill(mut meilisearch: tokio::process::Child) {
|
||||
if let Err(error) = meilisearch.kill().await {
|
||||
tracing::warn!(
|
||||
error = &error as &dyn std::error::Error,
|
||||
"while terminating Meilisearch server"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument]
|
||||
pub async fn build() -> anyhow::Result<()> {
|
||||
let mut command = tokio::process::Command::new("cargo");
|
||||
command.arg("build").arg("--release").arg("-p").arg("meilisearch");
|
||||
|
||||
command.kill_on_drop(true);
|
||||
|
||||
let mut builder = command.spawn().context("error building Meilisearch")?;
|
||||
|
||||
if !builder.wait().await.context("could not build Meilisearch")?.success() {
|
||||
bail!("failed building Meilisearch")
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(client, master_key, workload), fields(workload = workload.name))]
|
||||
pub async fn start(
|
||||
client: &Client,
|
||||
master_key: Option<&str>,
|
||||
workload: &Workload,
|
||||
asset_folder: &str,
|
||||
) -> anyhow::Result<tokio::process::Child> {
|
||||
let mut command = tokio::process::Command::new("cargo");
|
||||
command
|
||||
.arg("run")
|
||||
.arg("--release")
|
||||
.arg("-p")
|
||||
.arg("meilisearch")
|
||||
.arg("--bin")
|
||||
.arg("meilisearch")
|
||||
.arg("--");
|
||||
|
||||
command.arg("--db-path").arg("./_xtask_benchmark.ms");
|
||||
if let Some(master_key) = master_key {
|
||||
command.arg("--master-key").arg(master_key);
|
||||
}
|
||||
command.arg("--experimental-enable-logs-route");
|
||||
|
||||
for extra_arg in workload.extra_cli_args.iter() {
|
||||
command.arg(extra_arg);
|
||||
}
|
||||
|
||||
command.kill_on_drop(true);
|
||||
|
||||
let mut meilisearch = command.spawn().context("Error starting Meilisearch")?;
|
||||
|
||||
wait_for_health(client, &mut meilisearch, &workload.assets, asset_folder).await?;
|
||||
|
||||
Ok(meilisearch)
|
||||
}
|
||||
|
||||
async fn wait_for_health(
|
||||
client: &Client,
|
||||
meilisearch: &mut tokio::process::Child,
|
||||
assets: &BTreeMap<String, Asset>,
|
||||
asset_folder: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
for i in 0..100 {
|
||||
let res = super::command::run(client.clone(), health_command(), assets, asset_folder).await;
|
||||
if res.is_ok() {
|
||||
// check that this is actually the current Meilisearch instance that answered us
|
||||
if let Some(exit_code) =
|
||||
meilisearch.try_wait().context("cannot check Meilisearch server process status")?
|
||||
{
|
||||
tracing::error!("Got an health response from a different process");
|
||||
bail!("Meilisearch server exited early with code {exit_code}");
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
||||
// check whether the Meilisearch instance exited early (cut the wait)
|
||||
if let Some(exit_code) =
|
||||
meilisearch.try_wait().context("cannot check Meilisearch server process status")?
|
||||
{
|
||||
bail!("Meilisearch server exited early with code {exit_code}");
|
||||
}
|
||||
tracing::debug!(attempt = i, "Waiting for Meilisearch to go up");
|
||||
}
|
||||
bail!("meilisearch is not responding")
|
||||
}
|
||||
|
||||
fn health_command() -> super::command::Command {
|
||||
super::command::Command {
|
||||
route: "/health".into(),
|
||||
method: super::client::Method::Get,
|
||||
body: Default::default(),
|
||||
synchronous: super::command::SyncMode::WaitForResponse,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_db() {
|
||||
let _ = std::fs::remove_dir_all("./_xtask_benchmark.ms");
|
||||
}
|
||||
203
xtask/src/bench/mod.rs
Normal file
203
xtask/src/bench/mod.rs
Normal file
@@ -0,0 +1,203 @@
|
||||
mod assets;
|
||||
mod client;
|
||||
mod command;
|
||||
mod dashboard;
|
||||
mod env_info;
|
||||
mod meili_process;
|
||||
mod workload;
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use clap::Parser;
|
||||
use tracing_subscriber::fmt::format::FmtSpan;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
use self::client::Client;
|
||||
use self::workload::Workload;
|
||||
|
||||
pub fn default_http_addr() -> String {
|
||||
"127.0.0.1:7700".to_string()
|
||||
}
|
||||
pub fn default_report_folder() -> String {
|
||||
"./bench/reports/".into()
|
||||
}
|
||||
|
||||
pub fn default_asset_folder() -> String {
|
||||
"./bench/assets/".into()
|
||||
}
|
||||
|
||||
pub fn default_log_filter() -> String {
|
||||
"info".into()
|
||||
}
|
||||
|
||||
pub fn default_dashboard_url() -> String {
|
||||
"http://localhost:9001".into()
|
||||
}
|
||||
|
||||
/// Run benchmarks from a workload
|
||||
#[derive(Parser, Debug)]
|
||||
pub struct BenchDeriveArgs {
|
||||
/// Filename of the workload file, pass multiple filenames
|
||||
/// to run multiple workloads in the specified order.
|
||||
///
|
||||
/// Each workload run will get its own report file.
|
||||
#[arg(value_name = "WORKLOAD_FILE", last = false)]
|
||||
workload_file: Vec<PathBuf>,
|
||||
|
||||
/// URL of the dashboard.
|
||||
#[arg(long, default_value_t = default_dashboard_url())]
|
||||
dashboard_url: String,
|
||||
|
||||
/// Directory to output reports.
|
||||
#[arg(long, default_value_t = default_report_folder())]
|
||||
report_folder: String,
|
||||
|
||||
/// Directory to store the remote assets.
|
||||
#[arg(long, default_value_t = default_asset_folder())]
|
||||
asset_folder: String,
|
||||
|
||||
/// Log directives
|
||||
#[arg(short, long, default_value_t = default_log_filter())]
|
||||
log_filter: String,
|
||||
|
||||
/// Benchmark dashboard API key
|
||||
#[arg(long)]
|
||||
api_key: Option<String>,
|
||||
|
||||
/// Meilisearch master keys
|
||||
#[arg(long)]
|
||||
master_key: Option<String>,
|
||||
|
||||
/// Authentication bearer for fetching assets
|
||||
#[arg(long)]
|
||||
assets_key: Option<String>,
|
||||
|
||||
/// Reason for the benchmark invocation
|
||||
#[arg(short, long)]
|
||||
reason: Option<String>,
|
||||
}
|
||||
|
||||
pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
// setup logs
|
||||
let filter: tracing_subscriber::filter::Targets =
|
||||
args.log_filter.parse().context("invalid --log-filter")?;
|
||||
|
||||
let subscriber = tracing_subscriber::registry().with(
|
||||
tracing_subscriber::fmt::layer()
|
||||
.with_span_events(FmtSpan::NEW | FmtSpan::CLOSE)
|
||||
.with_filter(filter),
|
||||
);
|
||||
tracing::subscriber::set_global_default(subscriber).context("could not setup logging")?;
|
||||
|
||||
// fetch environment and build info
|
||||
let env = env_info::Environment::generate_from_current_config();
|
||||
let build_info = build_info::BuildInfo::from_build();
|
||||
|
||||
// tokio runtime
|
||||
let rt = tokio::runtime::Builder::new_current_thread().enable_io().enable_time().build()?;
|
||||
let _scope = rt.enter();
|
||||
|
||||
// setup clients
|
||||
let assets_client =
|
||||
Client::new(None, args.assets_key.as_deref(), Some(std::time::Duration::from_secs(3600)))?; // 1h
|
||||
|
||||
let dashboard_client = Client::new(
|
||||
Some(format!("{}/api/v1", args.dashboard_url)),
|
||||
args.api_key.as_deref(),
|
||||
Some(std::time::Duration::from_secs(60)),
|
||||
)?;
|
||||
|
||||
// reporting uses its own client because keeping the stream open to wait for entries
|
||||
// blocks any other requests
|
||||
// Also we don't want any pesky timeout because we don't know how much time it will take to recover the full trace
|
||||
let logs_client = Client::new(
|
||||
Some("http://127.0.0.1:7700/logs/stream".into()),
|
||||
args.master_key.as_deref(),
|
||||
None,
|
||||
)?;
|
||||
|
||||
let meili_client = Client::new(
|
||||
Some("http://127.0.0.1:7700".into()),
|
||||
args.master_key.as_deref(),
|
||||
Some(std::time::Duration::from_secs(60)),
|
||||
)?;
|
||||
|
||||
// enter runtime
|
||||
|
||||
rt.block_on(async {
|
||||
dashboard::send_machine_info(&dashboard_client, &env).await?;
|
||||
|
||||
let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap();
|
||||
let max_workloads = args.workload_file.len();
|
||||
let reason: Option<&str> = args.reason.as_deref();
|
||||
let invocation_uuid = dashboard::create_invocation(&dashboard_client, build_info, commit_message, env, max_workloads, reason).await?;
|
||||
|
||||
tracing::info!(workload_count = args.workload_file.len(), "handling workload files");
|
||||
|
||||
// main task
|
||||
let workload_runs = tokio::spawn(
|
||||
{
|
||||
let dashboard_client = dashboard_client.clone();
|
||||
async move {
|
||||
for workload_file in args.workload_file.iter() {
|
||||
let workload: Workload = serde_json::from_reader(
|
||||
std::fs::File::open(workload_file)
|
||||
.with_context(|| format!("error opening {}", workload_file.display()))?,
|
||||
)
|
||||
.with_context(|| format!("error parsing {} as JSON", workload_file.display()))?;
|
||||
|
||||
workload::execute(
|
||||
&assets_client,
|
||||
&dashboard_client,
|
||||
&logs_client,
|
||||
&meili_client,
|
||||
invocation_uuid,
|
||||
args.master_key.as_deref(),
|
||||
workload,
|
||||
&args,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Ok::<(), anyhow::Error>(())
|
||||
}});
|
||||
|
||||
// handle ctrl-c
|
||||
let abort_handle = workload_runs.abort_handle();
|
||||
tokio::spawn({
|
||||
let dashboard_client = dashboard_client.clone();
|
||||
dashboard::cancel_on_ctrl_c(invocation_uuid, dashboard_client, abort_handle)
|
||||
});
|
||||
|
||||
// wait for the end of the main task, handle result
|
||||
match workload_runs.await {
|
||||
Ok(Ok(_)) => {
|
||||
tracing::info!("Success");
|
||||
Ok::<(), anyhow::Error>(())
|
||||
}
|
||||
Ok(Err(error)) => {
|
||||
tracing::error!(%invocation_uuid, error = %error, "invocation failed, attempting to report the failure to dashboard");
|
||||
dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some(error.to_string())).await;
|
||||
tracing::warn!(%invocation_uuid, "invocation marked as failed following error");
|
||||
Err(error)
|
||||
},
|
||||
Err(join_error) => {
|
||||
match join_error.try_into_panic() {
|
||||
Ok(panic) => {
|
||||
tracing::error!("invocation panicked, attempting to report the failure to dashboard");
|
||||
dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some("Panicked".into())).await;
|
||||
std::panic::resume_unwind(panic)
|
||||
}
|
||||
Err(_) => {
|
||||
tracing::warn!("task was canceled");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
262
xtask/src/bench/workload.rs
Normal file
262
xtask/src/bench/workload.rs
Normal file
@@ -0,0 +1,262 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
use std::io::{Seek as _, Write as _};
|
||||
|
||||
use anyhow::{bail, Context as _};
|
||||
use futures_util::TryStreamExt as _;
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
use tokio::task::JoinHandle;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::assets::Asset;
|
||||
use super::client::Client;
|
||||
use super::command::SyncMode;
|
||||
use super::BenchDeriveArgs;
|
||||
use crate::bench::{assets, dashboard, meili_process};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct Workload {
|
||||
pub name: String,
|
||||
pub run_count: u16,
|
||||
pub extra_cli_args: Vec<String>,
|
||||
pub assets: BTreeMap<String, Asset>,
|
||||
pub commands: Vec<super::command::Command>,
|
||||
}
|
||||
|
||||
async fn run_commands(
|
||||
dashboard_client: &Client,
|
||||
logs_client: &Client,
|
||||
meili_client: &Client,
|
||||
workload_uuid: Uuid,
|
||||
workload: &Workload,
|
||||
args: &BenchDeriveArgs,
|
||||
run_number: u16,
|
||||
) -> anyhow::Result<JoinHandle<anyhow::Result<File>>> {
|
||||
let report_folder = &args.report_folder;
|
||||
let workload_name = &workload.name;
|
||||
|
||||
std::fs::create_dir_all(report_folder)
|
||||
.with_context(|| format!("could not create report directory at {report_folder}"))?;
|
||||
|
||||
let trace_filename = format!("{report_folder}/{workload_name}-{run_number}-trace.json");
|
||||
let report_filename = format!("{report_folder}/{workload_name}-{run_number}-report.json");
|
||||
|
||||
let report_handle = start_report(logs_client, trace_filename).await?;
|
||||
|
||||
for batch in workload
|
||||
.commands
|
||||
.as_slice()
|
||||
.split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
|
||||
{
|
||||
super::command::run_batch(meili_client, batch, &workload.assets, &args.asset_folder)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let processor =
|
||||
stop_report(dashboard_client, logs_client, workload_uuid, report_filename, report_handle)
|
||||
.await?;
|
||||
|
||||
Ok(processor)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
|
||||
#[tracing::instrument(skip(assets_client, dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = workload.name))]
|
||||
pub async fn execute(
|
||||
assets_client: &Client,
|
||||
dashboard_client: &Client,
|
||||
logs_client: &Client,
|
||||
meili_client: &Client,
|
||||
invocation_uuid: Uuid,
|
||||
master_key: Option<&str>,
|
||||
workload: Workload,
|
||||
args: &BenchDeriveArgs,
|
||||
) -> anyhow::Result<()> {
|
||||
assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
|
||||
|
||||
let workload_uuid =
|
||||
dashboard::create_workload(dashboard_client, invocation_uuid, &workload).await?;
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
|
||||
for i in 0..workload.run_count {
|
||||
tasks.push(
|
||||
execute_run(
|
||||
dashboard_client,
|
||||
logs_client,
|
||||
meili_client,
|
||||
workload_uuid,
|
||||
master_key,
|
||||
&workload,
|
||||
args,
|
||||
i,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
}
|
||||
|
||||
let mut reports = Vec::with_capacity(workload.run_count as usize);
|
||||
|
||||
for task in tasks {
|
||||
reports.push(
|
||||
task.await
|
||||
.context("task panicked while processing report")?
|
||||
.context("task failed while processing report")?,
|
||||
);
|
||||
}
|
||||
|
||||
tracing::info!(workload = workload.name, "Successful workload");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
|
||||
#[tracing::instrument(skip(dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = %workload.name))]
|
||||
async fn execute_run(
|
||||
dashboard_client: &Client,
|
||||
logs_client: &Client,
|
||||
meili_client: &Client,
|
||||
workload_uuid: Uuid,
|
||||
master_key: Option<&str>,
|
||||
workload: &Workload,
|
||||
args: &BenchDeriveArgs,
|
||||
run_number: u16,
|
||||
) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
|
||||
meili_process::delete_db();
|
||||
|
||||
meili_process::build().await?;
|
||||
let meilisearch =
|
||||
meili_process::start(meili_client, master_key, workload, &args.asset_folder).await?;
|
||||
|
||||
let processor = run_commands(
|
||||
dashboard_client,
|
||||
logs_client,
|
||||
meili_client,
|
||||
workload_uuid,
|
||||
workload,
|
||||
args,
|
||||
run_number,
|
||||
)
|
||||
.await?;
|
||||
|
||||
meili_process::kill(meilisearch).await;
|
||||
|
||||
tracing::info!(run_number, "Successful run");
|
||||
|
||||
Ok(processor)
|
||||
}
|
||||
|
||||
async fn start_report(
|
||||
logs_client: &Client,
|
||||
filename: String,
|
||||
) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
|
||||
let report_file = std::fs::File::options()
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.write(true)
|
||||
.read(true)
|
||||
.open(&filename)
|
||||
.with_context(|| format!("could not create file at {filename}"))?;
|
||||
let mut report_file = std::io::BufWriter::new(report_file);
|
||||
|
||||
let response = logs_client
|
||||
.post("")
|
||||
.json(&json!({
|
||||
"mode": "profile",
|
||||
"target": "indexing::=trace"
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.context("failed to start report")?;
|
||||
|
||||
let code = response.status();
|
||||
if code.is_client_error() {
|
||||
tracing::error!(%code, "request error when trying to start report");
|
||||
let response: serde_json::Value = response
|
||||
.json()
|
||||
.await
|
||||
.context("could not deserialize response as JSON")
|
||||
.context("response error when trying to start report")?;
|
||||
bail!(
|
||||
"request error when trying to start report: server responded with error code {code} and '{response}'"
|
||||
)
|
||||
} else if code.is_server_error() {
|
||||
tracing::error!(%code, "server error when trying to start report");
|
||||
let response: serde_json::Value = response
|
||||
.json()
|
||||
.await
|
||||
.context("could not deserialize response as JSON")
|
||||
.context("response error trying to start report")?;
|
||||
bail!("server error when trying to start report: server responded with error code {code} and '{response}'")
|
||||
}
|
||||
|
||||
Ok(tokio::task::spawn(async move {
|
||||
let mut stream = response.bytes_stream();
|
||||
while let Some(bytes) = stream.try_next().await.context("while waiting for report")? {
|
||||
report_file
|
||||
.write_all(&bytes)
|
||||
.with_context(|| format!("while writing report to {filename}"))?;
|
||||
}
|
||||
report_file.into_inner().with_context(|| format!("while writing report to {filename}"))
|
||||
}))
|
||||
}
|
||||
|
||||
async fn stop_report(
|
||||
dashboard_client: &Client,
|
||||
logs_client: &Client,
|
||||
workload_uuid: Uuid,
|
||||
filename: String,
|
||||
report_handle: tokio::task::JoinHandle<anyhow::Result<std::fs::File>>,
|
||||
) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
|
||||
let response = logs_client.delete("").send().await.context("while stopping report")?;
|
||||
if !response.status().is_success() {
|
||||
bail!("received HTTP {} while stopping report", response.status())
|
||||
}
|
||||
|
||||
let mut file = tokio::time::timeout(std::time::Duration::from_secs(1000), report_handle)
|
||||
.await
|
||||
.context("while waiting for the end of the report")?
|
||||
.context("report writing task panicked")?
|
||||
.context("while writing report")?;
|
||||
|
||||
file.rewind().context("while rewinding report file")?;
|
||||
|
||||
let process_handle = tokio::task::spawn({
|
||||
let dashboard_client = dashboard_client.clone();
|
||||
async move {
|
||||
let span = tracing::info_span!("processing trace to report", filename);
|
||||
let _guard = span.enter();
|
||||
let report = tracing_trace::processor::span_stats::to_call_stats(
|
||||
tracing_trace::TraceReader::new(std::io::BufReader::new(file)),
|
||||
)
|
||||
.context("could not convert trace to report")?;
|
||||
let context = || format!("writing report to {filename}");
|
||||
|
||||
dashboard::create_run(dashboard_client, workload_uuid, &report).await?;
|
||||
|
||||
let mut output_file = std::io::BufWriter::new(
|
||||
std::fs::File::options()
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.write(true)
|
||||
.read(true)
|
||||
.open(&filename)
|
||||
.with_context(context)?,
|
||||
);
|
||||
|
||||
for (key, value) in report {
|
||||
serde_json::to_writer(&mut output_file, &json!({key: value}))
|
||||
.context("serializing span stat")?;
|
||||
writeln!(&mut output_file).with_context(context)?;
|
||||
}
|
||||
output_file.flush().with_context(context)?;
|
||||
let mut output_file = output_file.into_inner().with_context(context)?;
|
||||
|
||||
output_file.rewind().context("could not rewind output_file").with_context(context)?;
|
||||
|
||||
Ok(output_file)
|
||||
}
|
||||
});
|
||||
|
||||
Ok(process_handle)
|
||||
}
|
||||
1
xtask/src/lib.rs
Normal file
1
xtask/src/lib.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod bench;
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use clap::Parser;
|
||||
use xtask::bench::BenchDeriveArgs;
|
||||
|
||||
/// List features available in the workspace
|
||||
#[derive(Parser, Debug)]
|
||||
@@ -17,13 +18,16 @@ struct ListFeaturesDeriveArgs {
|
||||
#[command(bin_name = "cargo xtask")]
|
||||
enum Command {
|
||||
ListFeatures(ListFeaturesDeriveArgs),
|
||||
Bench(BenchDeriveArgs),
|
||||
}
|
||||
|
||||
fn main() {
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let args = Command::parse();
|
||||
match args {
|
||||
Command::ListFeatures(args) => list_features(args),
|
||||
Command::Bench(args) => xtask::bench::run(args)?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn list_features(args: ListFeaturesDeriveArgs) {
|
||||
|
||||
Reference in New Issue
Block a user