mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-19 13:00:46 +00:00
Compare commits
505 Commits
prototype-
...
embedding-
Author | SHA1 | Date | |
---|---|---|---|
3aef2c9e42 | |||
53dbb790bb | |||
556a9ce9dc | |||
b9e4c6b8c2 | |||
fd8b2451d7 | |||
058f9ffda5 | |||
5d363205a5 | |||
a683faa882 | |||
8887cbdcd5 | |||
634865ff53 | |||
36fccf8525 | |||
d6bd60d569 | |||
48ad959fc1 | |||
1bc30cb4c8 | |||
77138a42d6 | |||
0791506124 | |||
2a015ac3b8 | |||
6f248b78a9 | |||
d694e312ff | |||
d76dcc8998 | |||
e654f66223 | |||
34f2ab7093 | |||
1a9dbd364e | |||
662c5d9871 | |||
5cd61b50f9 | |||
9a9be76757 | |||
cfa6ba6c3b | |||
f4f333dbf6 | |||
1ade76ba10 | |||
ae26658913 | |||
aa09edb3fb | |||
3f42f1a036 | |||
9bdfdd395b | |||
78d0625a91 | |||
3f655ea20e | |||
50bc1d55f3 | |||
0a4f2ef891 | |||
faa1f7c5b7 | |||
3cc5d86598 | |||
1ae47bec77 | |||
2f1be0ff86 | |||
9cee432255 | |||
ff8d48d2f1 | |||
a56c036994 | |||
511c48f520 | |||
4623691d1f | |||
3261aadcf2 | |||
073e9f2967 | |||
5f8f48ec95 | |||
ed2fe365a0 | |||
f7c8a77f89 | |||
a8030850ee | |||
132065afda | |||
51c298662b | |||
70a860a0f0 | |||
a3254d7d7d | |||
73c9c1ebdc | |||
4c7a6e5c1b | |||
ef4c87accf | |||
ced7ea4a5c | |||
fa3990daf9 | |||
c5993196b3 | |||
16234e1313 | |||
be9f4f96df | |||
b274106ad3 | |||
48527761e7 | |||
6792d048b8 | |||
07bfed99e6 | |||
8dfded2993 | |||
3714f16696 | |||
d0cd3cacec | |||
fef089c7b6 | |||
d47e1e15de | |||
caccb51814 | |||
a76a3e8f11 | |||
32dede35c7 | |||
6397ef12a0 | |||
cf9b311f71 | |||
7423243be0 | |||
b5e41f0e46 | |||
5690700601 | |||
2faad504c6 | |||
2bcd69750f | |||
9f0d33ec99 | |||
de24e75be8 | |||
a3af9fe057 | |||
90683d0e4e | |||
5c79273748 | |||
90e6b6416f | |||
2b75072b09 | |||
6e6fd077d4 | |||
b45eea0d3e | |||
a051ab3d9a | |||
0b89ef1fd7 | |||
65ba7b47af | |||
8af76a65bf | |||
6b94033c97 | |||
dfe0c8664e | |||
0ca652de28 | |||
87f105747f | |||
735634e998 | |||
3740755d9c | |||
bbcabc47bd | |||
a06cb1bfd6 | |||
549dc985b8 | |||
428463e45c | |||
7113fcf63a | |||
aa6855cd4f | |||
895db76a51 | |||
a88146d59e | |||
91e77abf4f | |||
f60814b319 | |||
5a675bcb82 | |||
82a796aea7 | |||
f6287602e9 | |||
ede456c5b0 | |||
3f5b5df139 | |||
d72e5f5f69 | |||
aa366d593d | |||
205430854d | |||
be64006211 | |||
eda309d562 | |||
119d618a76 | |||
2b2e6c0b3a | |||
e6329e77e1 | |||
b086c51a23 | |||
9ce5598fef | |||
e30c24b5bf | |||
c1a132fa06 | |||
e54fc59248 | |||
11e7c0d75f | |||
c593fbe648 | |||
2b3327ea74 | |||
d14184f4da | |||
46bceb91f1 | |||
cab5e35ff7 | |||
f8232976ed | |||
22d363c05a | |||
41620d5325 | |||
f3d5c74c02 | |||
d48baece51 | |||
c45ede44a8 | |||
4235a82dcf | |||
e7b9b8f002 | |||
5716ab70f3 | |||
422a786ffd | |||
836ae19bec | |||
0b5bc41b79 | |||
b45059e8f2 | |||
c16c60b599 | |||
0114796d2a | |||
17a94c40dc | |||
76ca44b214 | |||
d2e4d6dd8a | |||
879cf85037 | |||
c2d5b20a42 | |||
600178c5ab | |||
b93ca3945e | |||
8fef48f8ca | |||
dedae94102 | |||
7ae9a4afee | |||
d2776efb11 | |||
9211e94c4f | |||
b7bebe9bbb | |||
37a692f942 | |||
25c19a306b | |||
c078efd730 | |||
9dac91efe0 | |||
074d509d92 | |||
d439a3cb9d | |||
e92b6beb20 | |||
27cc357362 | |||
73dfeefc7c | |||
d85480de89 | |||
9f55708d84 | |||
280c3907be | |||
8419fd9b3b | |||
283944ea89 | |||
8aacd6374a | |||
8326f34ad1 | |||
259fc067d3 | |||
e8b2bb3ea6 | |||
7dfb2071b5 | |||
9cfbef478e | |||
efd5fd96cc | |||
f4a908669c | |||
eb2c2815b6 | |||
0ef52941c7 | |||
0d85f8fcee | |||
f4bb6cbca8 | |||
ad03c86c44 | |||
85037352b9 | |||
29e9c74a49 | |||
1b54c866e1 | |||
e414284335 | |||
7a204609fe | |||
f6803dd7d1 | |||
f86f4f619f | |||
e35d58b531 | |||
63827bbee0 | |||
6b2b8ed676 | |||
6db5939f84 | |||
d35b2d8d33 | |||
0687cf058a | |||
340d9e6edc | |||
7219299436 | |||
657bbf5d1e | |||
28adbc0d18 | |||
e3fba62e13 | |||
fb9170b8e3 | |||
c15763f910 | |||
7fa1c41190 | |||
77802dabf6 | |||
a685eeafeb | |||
f16e6f7c37 | |||
900be0ccad | |||
51a087b764 | |||
31142b3663 | |||
e60b855a54 | |||
510a4b91be | |||
e704f4d1ec | |||
82fe80b360 | |||
0f1dd3614c | |||
3aa6c3c750 | |||
b956918c11 | |||
e3003c1609 | |||
bf13268649 | |||
0bb7866f1e | |||
e6e9a033aa | |||
63031219c5 | |||
44d6430bae | |||
4d26e9c6f2 | |||
2ff382c023 | |||
0f6dd133b2 | |||
29f6eeff8f | |||
ef007d547d | |||
3fc16c627d | |||
9422b6d654 | |||
ddba52414a | |||
4534dc2cab | |||
b05cb80803 | |||
6e0526090a | |||
a743da3061 | |||
c6216517c7 | |||
2d4f7c635e | |||
ee812b31c4 | |||
3329248a84 | |||
bc08cd0deb | |||
3e2f468213 | |||
7c448bcc00 | |||
acb7c0a449 | |||
e8795d2608 | |||
e023ee4b6b | |||
e74c3b692a | |||
1d3b18f774 | |||
00bc86e74b | |||
adc9976615 | |||
2090e9ea31 | |||
1c8f1c18f4 | |||
ae8c1461e1 | |||
5f62274f21 | |||
c4a96b40eb | |||
5f50fc9464 | |||
89498a2bea | |||
211c1b753f | |||
d08e89ea3d | |||
695877043a | |||
bc4d1530ee | |||
d7721fe607 | |||
4a179fb3c0 | |||
59a1c5d9a7 | |||
2f82d94502 | |||
bd2bd0f33b | |||
e02733df4a | |||
f373ecc96a | |||
748a327271 | |||
4925b30196 | |||
43c4a229b7 | |||
ca112a8b95 | |||
855fa555a3 | |||
a237c0797a | |||
5c46dc702a | |||
4cadc8113b | |||
2d6dc83940 | |||
ab768f379f | |||
705e9a9e5e | |||
c17031d3de | |||
67f2a30d7c | |||
99732f4084 | |||
5081d837ea | |||
9e1cb792f4 | |||
b6b7ede266 | |||
f50e586a4f | |||
11fedea788 | |||
032b34c377 | |||
b421c8e7de | |||
00eb258a53 | |||
fc6cc80705 | |||
138d20b277 | |||
7c1a9113f9 | |||
07ae297ffd | |||
4069dbcfca | |||
03eb50fbac | |||
2616d776f2 | |||
3004db95af | |||
9a729bf31d | |||
8bfa6a7f54 | |||
056f18bd02 | |||
fe9866aca8 | |||
60f105a4a3 | |||
abb399b802 | |||
aeaac7270e | |||
f45770a3ce | |||
0e10ff1aa3 | |||
6ee608c2d1 | |||
95e8a9bef1 | |||
0598320252 | |||
2269104337 | |||
e8774ad079 | |||
c3368e6859 | |||
9bda9a9a64 | |||
aefebdeb8b | |||
646e44ddf9 | |||
b8845d1015 | |||
620867d611 | |||
a73d3c03e9 | |||
824f5b12ce | |||
bb4baf7fae | |||
0263eb0aec | |||
8a916a4e42 | |||
6a683975bf | |||
1824fbd1b5 | |||
34d8a54c4b | |||
8fa6e8670a | |||
170ad87e44 | |||
8f96724adf | |||
01e5b0effa | |||
2ec9664878 | |||
10028515ac | |||
63ccd19ab1 | |||
1b4d344e18 | |||
89c0cf9b12 | |||
3770e70581 | |||
e497008161 | |||
a15ebb283f | |||
3f256a7959 | |||
b41af0d0f6 | |||
3ebff65ef3 | |||
666680bd87 | |||
27527849bb | |||
1d02efeab9 | |||
53fc98d3b0 | |||
263300b3a3 | |||
ab3d92d163 | |||
ef9fc6c854 | |||
61b0f50d4d | |||
0557a4dd2f | |||
930d5a09a8 | |||
8b0c4291ae | |||
c9efdf8c88 | |||
72736c0ea9 | |||
49317bbee4 | |||
af54c8381e | |||
693fcd5752 | |||
733175359a | |||
7c6162f0bf | |||
d6ae39bf0f | |||
e416bbc1de | |||
2cfd363dc6 | |||
70aa78a2c2 | |||
96c81762ed | |||
0b1f634afa | |||
d3d5015854 | |||
f95f29c492 | |||
a50b69b868 | |||
3668f5f021 | |||
54fdf379bb | |||
41b1cd5a73 | |||
5c14a25d5a | |||
fda2843135 | |||
9347330f3a | |||
56c9190dab | |||
6b986dceaf | |||
ea6bb4df1d | |||
a3d2f64725 | |||
d5526cffff | |||
5cb75d1f2a | |||
921e3c4ffe | |||
52591761af | |||
f80182f0a9 | |||
3b30b6a57a | |||
5efc78db55 | |||
cffbe3fcb6 | |||
8d8fcb9846 | |||
20049669c9 | |||
db28d13cb1 | |||
5a7cfc57fd | |||
790621dc29 | |||
1d577ae98b | |||
88e9a55d44 | |||
dbe551cf99 | |||
a299fbd33b | |||
193119acb9 | |||
4c71118699 | |||
5fe2943d3c | |||
86ff502327 | |||
6b1a345dce | |||
b54ece690b | |||
3ea167bade | |||
1158d6689f | |||
d9b0463a0b | |||
ae9899f179 | |||
308fd7128e | |||
27e7c00622 | |||
58207da934 | |||
fb8b832192 | |||
17207b5405 | |||
bd95503eba | |||
8b8b0d802c | |||
d329e86250 | |||
d416b3b390 | |||
54f5e74744 | |||
fd4b192a39 | |||
3c13feebf7 | |||
1811168b96 | |||
b06cc1e0a2 | |||
44f812c36d | |||
c8e77b5f25 | |||
283f516e15 | |||
b4ca0a8c98 | |||
b658e38acd | |||
f87e46cc16 | |||
65354b414a | |||
025df397c0 | |||
f77abc9dc8 | |||
7e9909ee45 | |||
43ec97fe45 | |||
02929e241b | |||
c13efde042 | |||
36f0a1492c | |||
ce65ad213b | |||
3e0de6cb83 | |||
f3d691667d | |||
ce9c930d10 | |||
fc88b003b4 | |||
cf5d26124a | |||
38b1c57fa8 | |||
25c525b057 | |||
83cd28b60b | |||
48cad4132a | |||
4897ad99d0 | |||
46ff78b4ec | |||
9ad43b6841 | |||
c9ec502ed9 | |||
18aed75d3b | |||
6738a4f6ee | |||
d2948adea3 | |||
f54b57e5be | |||
95821d0bde | |||
f690fa0686 | |||
24e94b28c1 | |||
34d58f35c8 | |||
1d5265caf4 | |||
97aeb6db4d | |||
f888f87635 | |||
8c8d98eeaa | |||
c5ae43cac6 | |||
57eecd6197 | |||
2fe5c78cb6 | |||
8047cfe438 | |||
5717e5c1af | |||
bb07038c31 | |||
d1a088ea0b | |||
b68e22c0e6 | |||
03a36f116e | |||
8a0bf24ed5 | |||
e2763471e5 | |||
b2f2c5d69f | |||
1594c54e23 | |||
13b607bd68 | |||
3d130d31c8 | |||
4cda584b0c | |||
248c90bad5 | |||
0e9040e605 | |||
3e3c00f44c | |||
d986a3bbaf | |||
c2ceb8e41b | |||
79db2e67fb | |||
865f24cfef | |||
3fbe1df770 | |||
150d1db86b | |||
806e983aa5 | |||
e96c1d4b0f | |||
15cdc6924b | |||
677e8b122c | |||
75a7e40a27 | |||
c8939944c6 | |||
4e6252fb03 | |||
8bd8e744f3 | |||
53f32a7dd7 | |||
47a7ed93d3 | |||
2ac826edca | |||
89aff2081c | |||
3b773b3416 | |||
648b2876f6 |
10
.github/workflows/db-change-missing.yml
vendored
10
.github/workflows/db-change-missing.yml
vendored
@ -4,22 +4,22 @@ on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, labeled, unlabeled]
|
||||
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
|
||||
jobs:
|
||||
check-labels:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
- name: Check db change labels
|
||||
id: check_labels
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
URL=/repos/meilisearch/meilisearch/pulls/${{ github.event.pull_request.number }}/labels
|
||||
echo ${{ github.event.pull_request.number }}
|
||||
echo $URL
|
||||
LABELS=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /repos/meilisearch/meilisearch/issues/${{ github.event.pull_request.number }}/labels -q .[].name)
|
||||
LABELS=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/labels -q .[].name)
|
||||
echo "Labels: $LABELS"
|
||||
if [[ ! "$LABELS" =~ "db change" && ! "$LABELS" =~ "no db change" ]]; then
|
||||
echo "::error::Pull request must contain either the 'db change' or 'no db change' label."
|
||||
exit 1
|
||||
|
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@ -32,7 +32,7 @@ jobs:
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
uses: svenstaro/upload-release-action@2.11.1
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
|
8
.github/workflows/publish-binaries.yml
vendored
8
.github/workflows/publish-binaries.yml
vendored
@ -51,7 +51,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
uses: svenstaro/upload-release-action@2.11.1
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
@ -81,7 +81,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
uses: svenstaro/upload-release-action@2.11.1
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
@ -113,7 +113,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
uses: svenstaro/upload-release-action@2.11.1
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
@ -178,7 +178,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
uses: svenstaro/upload-release-action@2.11.1
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
|
10
.github/workflows/test-suite.yml
vendored
10
.github/workflows/test-suite.yml
vendored
@ -29,7 +29,7 @@ jobs:
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -51,7 +51,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
@ -155,7 +155,7 @@ jobs:
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -172,7 +172,7 @@ jobs:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -191,7 +191,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
11
.gitignore
vendored
11
.gitignore
vendored
@ -11,12 +11,21 @@
|
||||
/bench
|
||||
/_xtask_benchmark.ms
|
||||
/benchmarks
|
||||
.DS_Store
|
||||
|
||||
# Snapshots
|
||||
## ... large
|
||||
*.full.snap
|
||||
## ... unreviewed
|
||||
## ... unreviewed
|
||||
*.snap.new
|
||||
## ... pending
|
||||
*.pending-snap
|
||||
|
||||
# Tmp files
|
||||
.tmp*
|
||||
|
||||
# Database snapshot
|
||||
crates/meilisearch/db.snapshot
|
||||
|
||||
# Fuzzcheck data for the facet indexing fuzz test
|
||||
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
|
||||
|
@ -57,9 +57,17 @@ This command will be triggered to each PR as a requirement for merging it.
|
||||
You can set the `LINDERA_CACHE` environment variable to speed up your successive builds by up to 2 minutes.
|
||||
It'll store some built artifacts in the directory of your choice.
|
||||
|
||||
We recommend using the standard `$HOME/.cache/lindera` directory:
|
||||
We recommend using the `$HOME/.cache/meili/lindera` directory:
|
||||
```sh
|
||||
export LINDERA_CACHE=$HOME/.cache/lindera
|
||||
export LINDERA_CACHE=$HOME/.cache/meili/lindera
|
||||
```
|
||||
|
||||
You can set the `MILLI_BENCH_DATASETS_PATH` environment variable to further speed up your builds.
|
||||
It'll store some big files used for the benchmarks in the directory of your choice.
|
||||
|
||||
We recommend using the `$HOME/.cache/meili/benches` directory:
|
||||
```sh
|
||||
export MILLI_BENCH_DATASETS_PATH=$HOME/.cache/meili/benches
|
||||
```
|
||||
|
||||
Furthermore, you can improve incremental compilation by setting the `MEILI_NO_VERGEN` environment variable.
|
||||
|
668
Cargo.lock
generated
668
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.15.2"
|
||||
version = "1.16.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
@ -11,27 +11,27 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
bumpalo = "3.16.0"
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
csv = "1.3.1"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.10"
|
||||
roaring = "0.10.12"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.95"
|
||||
bytes = "1.9.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.35"
|
||||
reqwest = { version = "0.12.15", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
anyhow = "1.0.98"
|
||||
bytes = "1.10.1"
|
||||
convert_case = "0.8.0"
|
||||
flate2 = "1.1.2"
|
||||
reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
@ -51,3 +51,8 @@ harness = false
|
||||
[[bench]]
|
||||
name = "indexing"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "sort"
|
||||
harness = false
|
||||
|
||||
|
@ -11,7 +11,7 @@ use milli::heed::{EnvOpenOptions, RwTxn};
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::vector::RuntimeEmbedders;
|
||||
use milli::{FilterableAttributesRule, Index};
|
||||
use rand::seq::SliceRandom;
|
||||
use rand_chacha::rand_core::SeedableRng;
|
||||
@ -65,7 +65,7 @@ fn setup_settings<'t>(
|
||||
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_sortable_fields(sortable_fields);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
|
||||
}
|
||||
|
||||
fn setup_index_with_settings(
|
||||
@ -166,9 +166,10 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -232,9 +233,10 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -276,9 +278,10 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -344,9 +347,10 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -420,9 +424,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -464,9 +469,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -504,9 +510,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -571,9 +578,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -637,9 +645,10 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -703,9 +712,10 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -768,9 +778,10 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -812,9 +823,10 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -879,9 +891,10 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -955,9 +968,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1000,9 +1014,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1041,9 +1056,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1107,9 +1123,10 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1172,9 +1189,10 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1216,9 +1234,10 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1283,9 +1302,10 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1331,9 +1351,10 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
||||
new_fields_ids_map,
|
||||
Some(primary_key),
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1395,9 +1416,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1439,9 +1461,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1479,9 +1502,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1568,9 +1592,10 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1658,9 +1683,10 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1740,9 +1766,10 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1806,9 +1833,10 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1871,9 +1899,10 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1915,9 +1944,10 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -1982,9 +2012,10 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
114
crates/benchmarks/benches/sort.rs
Normal file
114
crates/benchmarks/benches/sort.rs
Normal file
@ -0,0 +1,114 @@
|
||||
//! This benchmark module is used to compare the performance of sorting documents in /search VS /documents
|
||||
//!
|
||||
//! The tests/benchmarks were designed in the context of a query returning only 20 documents.
|
||||
|
||||
mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn base_conf(builder: &mut Settings) {
|
||||
let displayed_fields =
|
||||
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let sortable_fields =
|
||||
["_geo", "name", "population", "elevation", "timezone", "modification-date"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_sortable_fields(sortable_fields);
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
const BASE_CONF: Conf = Conf {
|
||||
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
|
||||
dataset_format: "jsonl",
|
||||
configure: base_conf,
|
||||
primary_key: Some("geonameid"),
|
||||
queries: &[""],
|
||||
offsets: &[
|
||||
Some((0, 20)), // The most common query in the real world
|
||||
Some((0, 500)), // A query that ranges over many documents
|
||||
Some((980, 20)), // The worst query that could happen in the real world
|
||||
Some((800_000, 20)) // The worst query
|
||||
],
|
||||
get_documents: true,
|
||||
..Conf::BASE
|
||||
};
|
||||
|
||||
fn bench_sort(c: &mut criterion::Criterion) {
|
||||
#[rustfmt::skip]
|
||||
let confs = &[
|
||||
utils::Conf {
|
||||
group_name: "without sort",
|
||||
sort: None,
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many different values",
|
||||
sort: Some(vec!["name:asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many similar values",
|
||||
sort: Some(vec!["timezone:desc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many similar then different values",
|
||||
sort: Some(vec!["timezone:desc", "name:asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many different then similar values",
|
||||
sort: Some(vec!["timezone:desc", "name:asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "geo sort",
|
||||
sample_size: Some(10),
|
||||
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many similar values then geo sort",
|
||||
sample_size: Some(50),
|
||||
sort: Some(vec!["timezone:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many different values then geo sort",
|
||||
sample_size: Some(50),
|
||||
sort: Some(vec!["name:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many fields",
|
||||
sort: Some(vec!["population:asc", "name:asc", "elevation:asc", "timezone:asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
];
|
||||
|
||||
utils::run_benches(c, confs);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_sort);
|
||||
criterion_main!(benches);
|
@ -9,11 +9,12 @@ use anyhow::Context;
|
||||
use bumpalo::Bump;
|
||||
use criterion::BenchmarkId;
|
||||
use memmap2::Mmap;
|
||||
use milli::documents::sort::recursive_sort;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::vector::RuntimeEmbedders;
|
||||
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
|
||||
use serde_json::Value;
|
||||
|
||||
@ -35,6 +36,12 @@ pub struct Conf<'a> {
|
||||
pub configure: fn(&mut Settings),
|
||||
pub filter: Option<&'a str>,
|
||||
pub sort: Option<Vec<&'a str>>,
|
||||
/// set to skip documents (offset, limit)
|
||||
pub offsets: &'a [Option<(usize, usize)>],
|
||||
/// enable if you want to bench getting documents without querying
|
||||
pub get_documents: bool,
|
||||
/// configure the benchmark sample size
|
||||
pub sample_size: Option<usize>,
|
||||
/// enable or disable the optional words on the query
|
||||
pub optional_words: bool,
|
||||
/// primary key, if there is None we'll auto-generate docids for every documents
|
||||
@ -52,6 +59,9 @@ impl Conf<'_> {
|
||||
configure: |_| (),
|
||||
filter: None,
|
||||
sort: None,
|
||||
offsets: &[None],
|
||||
get_documents: false,
|
||||
sample_size: None,
|
||||
optional_words: true,
|
||||
primary_key: None,
|
||||
};
|
||||
@ -90,7 +100,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
|
||||
(conf.configure)(&mut builder);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
@ -125,9 +135,10 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@ -144,25 +155,79 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
||||
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
|
||||
let name = format!("{}: {}", file_name, conf.group_name);
|
||||
let mut group = c.benchmark_group(&name);
|
||||
if let Some(sample_size) = conf.sample_size {
|
||||
group.sample_size(sample_size);
|
||||
}
|
||||
|
||||
for &query in conf.queries {
|
||||
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
|
||||
b.iter(|| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
if let Some(filter) = conf.filter {
|
||||
let filter = Filter::from_str(filter).unwrap().unwrap();
|
||||
search.filter(filter);
|
||||
}
|
||||
if let Some(sort) = &conf.sort {
|
||||
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
let _ids = search.execute().unwrap();
|
||||
});
|
||||
});
|
||||
for offset in conf.offsets {
|
||||
let parameter = match offset {
|
||||
None => query.to_string(),
|
||||
Some((offset, limit)) => format!("{query}[{offset}:{limit}]"),
|
||||
};
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(parameter),
|
||||
&query,
|
||||
|b, &query| {
|
||||
b.iter(|| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
search
|
||||
.query(query)
|
||||
.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
if let Some(filter) = conf.filter {
|
||||
let filter = Filter::from_str(filter).unwrap().unwrap();
|
||||
search.filter(filter);
|
||||
}
|
||||
if let Some(sort) = &conf.sort {
|
||||
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
if let Some((offset, limit)) = offset {
|
||||
search.offset(*offset).limit(*limit);
|
||||
}
|
||||
|
||||
let _ids = search.execute().unwrap();
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if conf.get_documents {
|
||||
for offset in conf.offsets {
|
||||
let parameter = match offset {
|
||||
None => String::from("get_documents"),
|
||||
Some((offset, limit)) => format!("get_documents[{offset}:{limit}]"),
|
||||
};
|
||||
group.bench_with_input(BenchmarkId::from_parameter(parameter), &(), |b, &()| {
|
||||
b.iter(|| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
if let Some(sort) = &conf.sort {
|
||||
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
||||
let all_docs = index.documents_ids(&rtxn).unwrap();
|
||||
let facet_sort =
|
||||
recursive_sort(&index, &rtxn, sort, &all_docs).unwrap();
|
||||
let iter = facet_sort.iter().unwrap();
|
||||
if let Some((offset, limit)) = offset {
|
||||
let _results = iter.skip(*offset).take(*limit).collect::<Vec<_>>();
|
||||
} else {
|
||||
let _results = iter.collect::<Vec<_>>();
|
||||
}
|
||||
} else {
|
||||
let all_docs = index.documents_ids(&rtxn).unwrap();
|
||||
if let Some((offset, limit)) = offset {
|
||||
let _results =
|
||||
all_docs.iter().skip(*offset).take(*limit).collect::<Vec<_>>();
|
||||
} else {
|
||||
let _results = all_docs.iter().collect::<Vec<_>>();
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
group.finish();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
|
@ -67,7 +67,7 @@ fn main() -> anyhow::Result<()> {
|
||||
writeln!(
|
||||
&mut manifest_paths_file,
|
||||
r#"pub const {}: &str = {:?};"#,
|
||||
dataset.to_case(Case::ScreamingSnake),
|
||||
dataset.to_case(Case::UpperSnake),
|
||||
out_file.display(),
|
||||
)?;
|
||||
|
||||
|
@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.37", features = ["parsing"] }
|
||||
time = { version = "0.3.41", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.95"
|
||||
vergen-git2 = "1.0.2"
|
||||
anyhow = "1.0.98"
|
||||
vergen-git2 = "1.0.7"
|
||||
|
@ -11,21 +11,21 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
flate2 = "1.0.35"
|
||||
http = "1.2.0"
|
||||
anyhow = "1.0.98"
|
||||
flate2 = "1.1.2"
|
||||
http = "1.3.1"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.20.2"
|
||||
once_cell = "1.21.3"
|
||||
regex = "1.11.1"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
@ -1,12 +1,17 @@
|
||||
#![allow(clippy::type_complexity)]
|
||||
#![allow(clippy::wrong_self_convention)]
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::byte_unit::Byte;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::Unchecked;
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId};
|
||||
use meilisearch_types::tasks::{
|
||||
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
|
||||
};
|
||||
use meilisearch_types::InstanceUid;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -141,6 +146,12 @@ pub enum KindDump {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
},
|
||||
SnapshotCreation,
|
||||
Export {
|
||||
url: String,
|
||||
api_key: Option<String>,
|
||||
payload_size: Option<Byte>,
|
||||
indexes: BTreeMap<String, ExportIndexSettings>,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
@ -213,6 +224,15 @@ impl From<KindWithContent> for KindDump {
|
||||
KindDump::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
|
||||
KindWithContent::Export { url, api_key, payload_size, indexes } => KindDump::Export {
|
||||
url,
|
||||
api_key,
|
||||
payload_size,
|
||||
indexes: indexes
|
||||
.into_iter()
|
||||
.map(|(pattern, settings)| (pattern.to_string(), settings))
|
||||
.collect(),
|
||||
},
|
||||
KindWithContent::UpgradeDatabase { from: version } => {
|
||||
KindDump::UpgradeDatabase { from: version }
|
||||
}
|
||||
@ -329,6 +349,7 @@ pub(crate) mod test {
|
||||
write_channel_congestion: None,
|
||||
internal_database_sizes: Default::default(),
|
||||
},
|
||||
embedder_stats: Default::default(),
|
||||
enqueued_at: Some(BatchEnqueuedAt {
|
||||
earliest: datetime!(2022-11-11 0:00 UTC),
|
||||
oldest: datetime!(2022-11-11 0:00 UTC),
|
||||
|
@ -116,6 +116,15 @@ impl DumpReader {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn chat_completions_settings(
|
||||
&mut self,
|
||||
) -> Result<Box<dyn Iterator<Item = Result<(String, v6::ChatCompletionSettings)>> + '_>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => current.chat_completions_settings(),
|
||||
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.features()),
|
||||
|
@ -1,3 +1,4 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufRead, BufReader, ErrorKind};
|
||||
use std::path::Path;
|
||||
@ -21,6 +22,7 @@ pub type Unchecked = meilisearch_types::settings::Unchecked;
|
||||
pub type Task = crate::TaskDump;
|
||||
pub type Batch = meilisearch_types::batches::Batch;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
pub type Network = meilisearch_types::features::Network;
|
||||
|
||||
@ -192,6 +194,34 @@ impl V6Reader {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn chat_completions_settings(
|
||||
&mut self,
|
||||
) -> Result<Box<dyn Iterator<Item = Result<(String, ChatCompletionSettings)>> + '_>> {
|
||||
let entries = match fs::read_dir(self.dump.path().join("chat-completions-settings")) {
|
||||
Ok(entries) => entries,
|
||||
Err(e) if e.kind() == ErrorKind::NotFound => return Ok(Box::new(std::iter::empty())),
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
Ok(Box::new(
|
||||
entries
|
||||
.map(|entry| -> Result<Option<_>> {
|
||||
let entry = entry?;
|
||||
let file_name = entry.file_name();
|
||||
let path = Path::new(&file_name);
|
||||
if entry.file_type()?.is_file() && path.extension() == Some(OsStr::new("json"))
|
||||
{
|
||||
let name = path.file_stem().unwrap().to_str().unwrap().to_string();
|
||||
let file = File::open(entry.path())?;
|
||||
let settings = serde_json::from_reader(file)?;
|
||||
Ok(Some((name, settings)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
})
|
||||
.filter_map(|entry| entry.transpose()),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
|
||||
self.features
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ use std::path::PathBuf;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use serde_json::{Map, Value};
|
||||
@ -51,6 +51,10 @@ impl DumpWriter {
|
||||
KeyWriter::new(self.dir.path().to_path_buf())
|
||||
}
|
||||
|
||||
pub fn create_chat_completions_settings(&self) -> Result<ChatCompletionsSettingsWriter> {
|
||||
ChatCompletionsSettingsWriter::new(self.dir.path().join("chat-completions-settings"))
|
||||
}
|
||||
|
||||
pub fn create_tasks_queue(&self) -> Result<TaskWriter> {
|
||||
TaskWriter::new(self.dir.path().join("tasks"))
|
||||
}
|
||||
@ -104,6 +108,24 @@ impl KeyWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ChatCompletionsSettingsWriter {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl ChatCompletionsSettingsWriter {
|
||||
pub(crate) fn new(path: PathBuf) -> Result<Self> {
|
||||
std::fs::create_dir(&path)?;
|
||||
Ok(ChatCompletionsSettingsWriter { path })
|
||||
}
|
||||
|
||||
pub fn push_settings(&mut self, name: &str, settings: &ChatCompletionSettings) -> Result<()> {
|
||||
let mut settings_file = File::create(self.path.join(name).with_extension("json"))?;
|
||||
serde_json::to_writer(&mut settings_file, &settings)?;
|
||||
settings_file.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TaskWriter {
|
||||
queue: BufWriter<File>,
|
||||
update_files: PathBuf,
|
||||
|
@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
|
@ -14,7 +14,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
nom = "7.1.3"
|
||||
nom_locate = "4.2.0"
|
||||
unescaper = "0.1.5"
|
||||
unescaper = "0.1.6"
|
||||
|
||||
[dev-dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
|
@ -16,7 +16,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
|
||||
[[bench]]
|
||||
name = "benchmarks"
|
||||
|
@ -12,11 +12,11 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||
bumpalo = "3.16.0"
|
||||
clap = { version = "4.5.24", features = ["derive"] }
|
||||
either = "1.13.0"
|
||||
bumpalo = "3.18.1"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
either = "1.15.0"
|
||||
fastrand = "2.3.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
|
@ -13,7 +13,7 @@ use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::IndexerConfig;
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::vector::RuntimeEmbedders;
|
||||
use milli::Index;
|
||||
use serde_json::Value;
|
||||
use tempfile::TempDir;
|
||||
@ -89,7 +89,7 @@ fn main() {
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let embedders = RuntimeEmbedders::default();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
|
||||
let mut operations = Vec::new();
|
||||
@ -144,6 +144,7 @@ fn main() {
|
||||
embedders,
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
@ -11,31 +11,31 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
anyhow = "1.0.98"
|
||||
bincode = "1.3.3"
|
||||
byte-unit = "5.1.6"
|
||||
bumpalo = "3.16.0"
|
||||
bumpalo = "3.18.1"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.6.0"
|
||||
convert_case = "0.8.0"
|
||||
csv = "1.3.1"
|
||||
derive_builder = "0.20.2"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.35"
|
||||
indexmap = "2.7.0"
|
||||
flate2 = "1.1.2"
|
||||
indexmap = "2.9.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.5"
|
||||
page_size = "0.6.0"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.138", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
@ -43,7 +43,8 @@ time = { version = "0.3.37", features = [
|
||||
] }
|
||||
tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
backoff = "0.4.0"
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
@ -4,6 +4,7 @@ use std::io;
|
||||
use dump::{KindDump, TaskDump, UpdateFile};
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
@ -211,6 +212,23 @@ impl<'a> Dump<'a> {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
KindDump::Export { url, api_key, payload_size, indexes } => {
|
||||
KindWithContent::Export {
|
||||
url,
|
||||
api_key,
|
||||
payload_size,
|
||||
indexes: indexes
|
||||
.into_iter()
|
||||
.map(|(pattern, settings)| {
|
||||
Ok((
|
||||
IndexUidPattern::try_from(pattern)
|
||||
.map_err(|_| Error::CorruptedDump)?,
|
||||
settings,
|
||||
))
|
||||
})
|
||||
.collect::<Result<_, Error>>()?,
|
||||
}
|
||||
}
|
||||
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
||||
},
|
||||
};
|
||||
|
@ -151,6 +151,10 @@ pub enum Error {
|
||||
CorruptedTaskQueue,
|
||||
#[error(transparent)]
|
||||
DatabaseUpgrade(Box<Self>),
|
||||
#[error(transparent)]
|
||||
Export(Box<Self>),
|
||||
#[error("Failed to export documents to remote server {code} ({type}): {message} <{link}>")]
|
||||
FromRemoteWhenExporting { message: String, code: String, r#type: String, link: String },
|
||||
#[error("Failed to rollback for index `{index}`: {rollback_outcome} ")]
|
||||
RollbackFailed { index: String, rollback_outcome: RollbackOutcome },
|
||||
#[error(transparent)]
|
||||
@ -212,6 +216,7 @@ impl Error {
|
||||
| Error::BatchNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::FromRemoteWhenExporting { .. }
|
||||
| Error::AbortedTask
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
@ -221,6 +226,7 @@ impl Error {
|
||||
| Error::IoError(_)
|
||||
| Error::Persist(_)
|
||||
| Error::FeatureNotEnabled(_)
|
||||
| Error::Export(_)
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
@ -282,6 +288,7 @@ impl ErrorCode for Error {
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli { error, .. } => error.error_code(),
|
||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||
Error::FromRemoteWhenExporting { .. } => Code::Internal,
|
||||
Error::Heed(e) => e.error_code(),
|
||||
Error::HeedTransaction(e) => e.error_code(),
|
||||
Error::FileStore(e) => e.error_code(),
|
||||
@ -294,6 +301,7 @@ impl ErrorCode for Error {
|
||||
Error::CorruptedTaskQueue => Code::Internal,
|
||||
Error::CorruptedDump => Code::Internal,
|
||||
Error::DatabaseUpgrade(_) => Code::Internal,
|
||||
Error::Export(_) => Code::Internal,
|
||||
Error::RollbackFailed { .. } => Code::Internal,
|
||||
Error::UnrecoverableError(_) => Code::Internal,
|
||||
Error::IndexSchedulerVersionMismatch { .. } => Code::Internal,
|
||||
|
@ -144,6 +144,19 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_multimodal(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.multimodal {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "multimodal",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/846",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
|
@ -289,6 +289,9 @@ fn snapshot_details(d: &Details) -> String {
|
||||
Details::IndexSwap { swaps } => {
|
||||
format!("{{ swaps: {swaps:?} }}")
|
||||
}
|
||||
Details::Export { url, api_key, payload_size, indexes } => {
|
||||
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
|
||||
}
|
||||
Details::UpgradeDatabase { from, to } => {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
}
|
||||
@ -343,6 +346,7 @@ pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
uid,
|
||||
details,
|
||||
stats,
|
||||
embedder_stats,
|
||||
started_at,
|
||||
finished_at,
|
||||
progress: _,
|
||||
@ -366,6 +370,12 @@ pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
|
||||
if !embedder_stats.skip_serializing() {
|
||||
snap.push_str(&format!(
|
||||
"embedder stats: {}, ",
|
||||
serde_json::to_string(&embedder_stats).unwrap()
|
||||
));
|
||||
}
|
||||
snap.push_str(&format!("stop reason: {}, ", serde_json::to_string(&stop_reason).unwrap()));
|
||||
snap.push('}');
|
||||
snap
|
||||
|
@ -57,12 +57,15 @@ use meilisearch_types::features::{
|
||||
use meilisearch_types::heed::byteorder::BE;
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeJson, Str, I128};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, WithoutTls};
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||
use meilisearch_types::milli::vector::json_template::JsonTemplate;
|
||||
use meilisearch_types::milli::vector::{
|
||||
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
|
||||
};
|
||||
use meilisearch_types::milli::{self, Index};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{KindWithContent, Task};
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use processing::ProcessingTasks;
|
||||
pub use queue::Query;
|
||||
use queue::Queue;
|
||||
@ -851,29 +854,42 @@ impl IndexScheduler {
|
||||
&self,
|
||||
index_uid: String,
|
||||
embedding_configs: Vec<IndexEmbeddingConfig>,
|
||||
) -> Result<EmbeddingConfigs> {
|
||||
) -> Result<RuntimeEmbedders> {
|
||||
let res: Result<_> = embedding_configs
|
||||
.into_iter()
|
||||
.map(
|
||||
|IndexEmbeddingConfig {
|
||||
name,
|
||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
|
||||
..
|
||||
}| {
|
||||
let prompt = Arc::new(
|
||||
prompt
|
||||
.try_into()
|
||||
.map_err(meilisearch_types::milli::Error::from)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
fragments,
|
||||
}|
|
||||
-> Result<(String, Arc<RuntimeEmbedder>)> {
|
||||
let document_template = prompt
|
||||
.try_into()
|
||||
.map_err(meilisearch_types::milli::Error::from)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
let fragments = fragments
|
||||
.into_inner()
|
||||
.into_iter()
|
||||
.map(|fragment| {
|
||||
let value = embedder_options.fragment(&fragment.name).unwrap();
|
||||
let template = JsonTemplate::new(value.clone()).unwrap();
|
||||
RuntimeFragment { name: fragment.name, id: fragment.id, template }
|
||||
})
|
||||
.collect();
|
||||
// optimistically return existing embedder
|
||||
{
|
||||
let embedders = self.embedders.read().unwrap();
|
||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||
return Ok((
|
||||
name,
|
||||
(embedder.clone(), prompt, quantized.unwrap_or_default()),
|
||||
let runtime = Arc::new(RuntimeEmbedder::new(
|
||||
embedder.clone(),
|
||||
document_template,
|
||||
fragments,
|
||||
quantized.unwrap_or_default(),
|
||||
));
|
||||
|
||||
return Ok((name, runtime));
|
||||
}
|
||||
}
|
||||
|
||||
@ -889,11 +905,19 @@ impl IndexScheduler {
|
||||
let mut embedders = self.embedders.write().unwrap();
|
||||
embedders.insert(embedder_options, embedder.clone());
|
||||
}
|
||||
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
||||
|
||||
let runtime = Arc::new(RuntimeEmbedder::new(
|
||||
embedder.clone(),
|
||||
document_template,
|
||||
fragments,
|
||||
quantized.unwrap_or_default(),
|
||||
));
|
||||
|
||||
Ok((name, runtime))
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
res.map(EmbeddingConfigs::new)
|
||||
res.map(RuntimeEmbedders::new)
|
||||
}
|
||||
|
||||
pub fn chat_settings(&self, uid: &str) -> Result<Option<ChatCompletionSettings>> {
|
||||
|
@ -103,6 +103,7 @@ make_enum_progress! {
|
||||
pub enum DumpCreationProgress {
|
||||
StartTheDumpCreation,
|
||||
DumpTheApiKeys,
|
||||
DumpTheChatCompletionSettings,
|
||||
DumpTheTasks,
|
||||
DumpTheBatches,
|
||||
DumpTheIndexes,
|
||||
@ -175,8 +176,17 @@ make_enum_progress! {
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum Export {
|
||||
EnsuringCorrectnessOfTheTarget,
|
||||
ExportingTheSettings,
|
||||
ExportingTheDocuments,
|
||||
}
|
||||
}
|
||||
|
||||
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
|
||||
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||
make_atomic_progress!(Index alias AtomicIndexStep => "index" );
|
||||
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
||||
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
||||
|
||||
|
@ -179,6 +179,7 @@ impl BatchQueue {
|
||||
progress: None,
|
||||
details: batch.details,
|
||||
stats: batch.stats,
|
||||
embedder_stats: batch.embedder_stats.as_ref().into(),
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
enqueued_at: batch.enqueued_at,
|
||||
|
@ -71,6 +71,7 @@ impl From<KindWithContent> for AutobatchKind {
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
|
@ -1,4 +1,5 @@
|
||||
use std::fmt;
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
@ -47,6 +48,9 @@ pub(crate) enum Batch {
|
||||
IndexSwap {
|
||||
task: Task,
|
||||
},
|
||||
Export {
|
||||
task: Task,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
@ -103,6 +107,7 @@ impl Batch {
|
||||
Batch::TaskCancelation { task, .. }
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::Export { task }
|
||||
| Batch::IndexUpdate { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
@ -142,6 +147,7 @@ impl Batch {
|
||||
| TaskDeletions(_)
|
||||
| SnapshotCreation(_)
|
||||
| Dump(_)
|
||||
| Export { .. }
|
||||
| UpgradeDatabase { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
@ -167,6 +173,7 @@ impl fmt::Display for Batch {
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
Batch::Export { .. } => f.write_str("Export")?,
|
||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||
};
|
||||
match index_uid {
|
||||
@ -426,9 +433,10 @@ impl IndexScheduler {
|
||||
/// 0. We get the *last* task to cancel.
|
||||
/// 1. We get the tasks to upgrade.
|
||||
/// 2. We get the *next* task to delete.
|
||||
/// 3. We get the *next* snapshot to process.
|
||||
/// 4. We get the *next* dump to process.
|
||||
/// 5. We get the *next* tasks to process for a specific index.
|
||||
/// 3. We get the *next* export to process.
|
||||
/// 4. We get the *next* snapshot to process.
|
||||
/// 5. We get the *next* dump to process.
|
||||
/// 6. We get the *next* tasks to process for a specific index.
|
||||
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
|
||||
pub(crate) fn create_next_batch(
|
||||
&self,
|
||||
@ -500,7 +508,17 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 3. we batch the snapshot.
|
||||
// 3. we batch the export.
|
||||
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
|
||||
if !to_export.is_empty() {
|
||||
let task_id = to_export.iter().next().expect("There must be at least one export task");
|
||||
let mut task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
|
||||
current_batch.processing([&mut task]);
|
||||
current_batch.reason(BatchStopReason::TaskKindCannotBeBatched { kind: Kind::Export });
|
||||
return Ok(Some((Batch::Export { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
@ -510,7 +528,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the dumps.
|
||||
// 5. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
@ -523,7 +541,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
// 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
@ -577,7 +595,11 @@ impl IndexScheduler {
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
|
||||
|
||||
if let Some(uuid) = task.content_uuid() {
|
||||
let content_size = self.queue.file_store.compute_size(uuid)?;
|
||||
let content_size = match self.queue.file_store.compute_size(uuid) {
|
||||
Ok(content_size) => content_size,
|
||||
Err(file_store::Error::IoError(err)) if err.kind() == ErrorKind::NotFound => 0,
|
||||
Err(otherwise) => return Err(otherwise.into()),
|
||||
};
|
||||
total_size = total_size.saturating_add(content_size);
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@ mod autobatcher_test;
|
||||
mod create_batch;
|
||||
mod process_batch;
|
||||
mod process_dump_creation;
|
||||
mod process_export;
|
||||
mod process_index_operation;
|
||||
mod process_snapshot_creation;
|
||||
mod process_upgrade;
|
||||
|
@ -162,8 +162,13 @@ impl IndexScheduler {
|
||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||
|
||||
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
|
||||
let (tasks, congestion) =
|
||||
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
|
||||
let (tasks, congestion) = self.apply_index_operation(
|
||||
&mut index_wtxn,
|
||||
&index,
|
||||
op,
|
||||
&progress,
|
||||
current_batch.embedder_stats.clone(),
|
||||
)?;
|
||||
|
||||
{
|
||||
progress.update_progress(FinalizingIndexStep::Committing);
|
||||
@ -238,10 +243,12 @@ impl IndexScheduler {
|
||||
);
|
||||
builder.set_primary_key(primary_key);
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
&|| must_stop_processing.get(),
|
||||
&progress,
|
||||
current_batch.embedder_stats.clone(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
index_wtxn.commit()?;
|
||||
@ -361,6 +368,46 @@ impl IndexScheduler {
|
||||
task.status = Status::Succeeded;
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::Export { mut task } => {
|
||||
let KindWithContent::Export { url, api_key, payload_size, indexes } = &task.kind
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let ret = catch_unwind(AssertUnwindSafe(|| {
|
||||
self.process_export(
|
||||
url,
|
||||
api_key.as_deref(),
|
||||
payload_size.as_ref(),
|
||||
indexes,
|
||||
progress,
|
||||
)
|
||||
}));
|
||||
|
||||
let stats = match ret {
|
||||
Ok(Ok(stats)) => stats,
|
||||
Ok(Err(Error::AbortedTask)) => return Err(Error::AbortedTask),
|
||||
Ok(Err(e)) => return Err(Error::Export(Box::new(e))),
|
||||
Err(e) => {
|
||||
let msg = match e.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match e.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
return Err(Error::Export(Box::new(Error::ProcessBatchPanicked(
|
||||
msg.to_string(),
|
||||
))));
|
||||
}
|
||||
};
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
if let Some(Details::Export { indexes, .. }) = task.details.as_mut() {
|
||||
*indexes = stats;
|
||||
}
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::UpgradeDatabase { mut tasks } => {
|
||||
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
||||
unreachable!();
|
||||
@ -708,9 +755,11 @@ impl IndexScheduler {
|
||||
from.1,
|
||||
from.2
|
||||
);
|
||||
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||
let ret = catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||
self.process_rollback(from, progress)
|
||||
})) {
|
||||
}));
|
||||
|
||||
match ret {
|
||||
Ok(Ok(())) => {}
|
||||
Ok(Err(err)) => return Err(Error::DatabaseUpgrade(Box::new(err))),
|
||||
Err(e) => {
|
||||
|
@ -43,7 +43,16 @@ impl IndexScheduler {
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// 2. dump the tasks
|
||||
// 2. dump the chat completion settings
|
||||
// TODO should I skip the export if the chat completion has been disabled?
|
||||
progress.update_progress(DumpCreationProgress::DumpTheChatCompletionSettings);
|
||||
let mut dump_chat_completion_settings = dump.create_chat_completions_settings()?;
|
||||
for result in self.chat_settings.iter(&rtxn)? {
|
||||
let (name, chat_settings) = result?;
|
||||
dump_chat_completion_settings.push_settings(name, &chat_settings)?;
|
||||
}
|
||||
|
||||
// 3. dump the tasks
|
||||
progress.update_progress(DumpCreationProgress::DumpTheTasks);
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
|
||||
@ -81,7 +90,7 @@ impl IndexScheduler {
|
||||
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
|
||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file) = content_file {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
@ -105,7 +114,7 @@ impl IndexScheduler {
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
// 3. dump the batches
|
||||
// 4. dump the batches
|
||||
progress.update_progress(DumpCreationProgress::DumpTheBatches);
|
||||
let mut dump_batches = dump.create_batches_queue()?;
|
||||
|
||||
@ -138,7 +147,7 @@ impl IndexScheduler {
|
||||
}
|
||||
dump_batches.flush()?;
|
||||
|
||||
// 4. Dump the indexes
|
||||
// 5. Dump the indexes
|
||||
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
||||
let mut count = 0;
|
||||
@ -165,9 +174,6 @@ impl IndexScheduler {
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index
|
||||
.embedding_configs(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let nb_documents = index
|
||||
.number_of_documents(&rtxn)
|
||||
@ -178,7 +184,7 @@ impl IndexScheduler {
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// 4.1. Dump the documents
|
||||
// 5.1. Dump the documents
|
||||
for ret in documents {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
@ -221,16 +227,12 @@ impl IndexScheduler {
|
||||
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
||||
};
|
||||
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == embedder_name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
)),
|
||||
regenerate: !user_provided,
|
||||
regenerate,
|
||||
};
|
||||
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||
}
|
||||
@ -240,7 +242,7 @@ impl IndexScheduler {
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 4.2. Dump the settings
|
||||
// 5.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(
|
||||
index,
|
||||
&rtxn,
|
||||
@ -251,7 +253,7 @@ impl IndexScheduler {
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// 5. Dump experimental feature settings
|
||||
// 6. Dump experimental feature settings
|
||||
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
||||
let features = self.features().runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
|
367
crates/index-scheduler/src/scheduler/process_export.rs
Normal file
367
crates/index-scheduler/src/scheduler/process_export.rs
Normal file
@ -0,0 +1,367 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::{self, Write as _};
|
||||
use std::sync::atomic;
|
||||
use std::time::Duration;
|
||||
|
||||
use backoff::ExponentialBackoff;
|
||||
use byte_unit::Byte;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::update::{request_threads, Setting};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError};
|
||||
use meilisearch_types::settings::{self, SecretPolicy};
|
||||
use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings};
|
||||
use serde::Deserialize;
|
||||
use ureq::{json, Response};
|
||||
|
||||
use super::MustStopProcessing;
|
||||
use crate::processing::AtomicDocumentStep;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_export(
|
||||
&self,
|
||||
base_url: &str,
|
||||
api_key: Option<&str>,
|
||||
payload_size: Option<&Byte>,
|
||||
indexes: &BTreeMap<IndexUidPattern, ExportIndexSettings>,
|
||||
progress: Progress,
|
||||
) -> Result<BTreeMap<IndexUidPattern, DetailsExportIndexSettings>> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::ProcessExport)?;
|
||||
|
||||
let indexes: Vec<_> = self
|
||||
.index_names()?
|
||||
.into_iter()
|
||||
.flat_map(|uid| {
|
||||
indexes
|
||||
.iter()
|
||||
.find(|(pattern, _)| pattern.matches_str(&uid))
|
||||
.map(|(pattern, settings)| (pattern, uid, settings))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut output = BTreeMap::new();
|
||||
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() {
|
||||
if must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
progress.update_progress(VariableNameStep::<ExportIndex>::new(
|
||||
format!("Exporting index `{uid}`"),
|
||||
i as u32,
|
||||
indexes.len() as u32,
|
||||
));
|
||||
|
||||
let ExportIndexSettings { filter, override_settings } = export_settings;
|
||||
let index = self.index(uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
|
||||
|
||||
// First, check if the index already exists
|
||||
let url = format!("{base_url}/indexes/{uid}");
|
||||
let response = retry(&must_stop_processing, || {
|
||||
let mut request = agent.get(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
|
||||
request.send_bytes(Default::default()).map_err(into_backoff_error)
|
||||
});
|
||||
let index_exists = match response {
|
||||
Ok(response) => response.status() == 200,
|
||||
Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => {
|
||||
false
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
let primary_key = index
|
||||
.primary_key(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
// Create the index
|
||||
if !index_exists {
|
||||
let url = format!("{base_url}/indexes");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Patch the index primary key
|
||||
if index_exists && *override_settings {
|
||||
let url = format!("{base_url}/indexes/{uid}");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.patch(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
let index_param = json!({ "primaryKey": primary_key });
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Send the index settings
|
||||
if !index_exists || *override_settings {
|
||||
let mut settings =
|
||||
settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// Remove the experimental chat setting if not enabled
|
||||
if self.features().check_chat_completions("exporting chat settings").is_err() {
|
||||
settings.chat = Setting::NotSet;
|
||||
}
|
||||
// Retry logic for sending settings
|
||||
let url = format!("{base_url}/indexes/{uid}/settings");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.patch(&url);
|
||||
if let Some(bearer) = bearer.as_ref() {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_json(settings.clone()).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
let filter = filter
|
||||
.as_ref()
|
||||
.map(Filter::from_json)
|
||||
.transpose()
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
||||
.flatten();
|
||||
|
||||
let filter_universe = filter
|
||||
.map(|f| f.evaluate(&index_rtxn, &index))
|
||||
.transpose()
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
let whole_universe = index
|
||||
.documents_ids(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
let universe = filter_universe.unwrap_or(whole_universe);
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
// We don't need to keep this one alive as we will
|
||||
// spawn many threads to process the documents
|
||||
drop(index_rtxn);
|
||||
|
||||
let total_documents = universe.len() as u32;
|
||||
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
|
||||
progress.update_progress(progress_step);
|
||||
|
||||
output.insert(
|
||||
IndexUidPattern::new_unchecked(uid.clone()),
|
||||
DetailsExportIndexSettings {
|
||||
settings: (*export_settings).clone(),
|
||||
matched_documents: Some(total_documents as u64),
|
||||
},
|
||||
);
|
||||
|
||||
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
|
||||
let documents_url = format!("{base_url}/indexes/{uid}/documents");
|
||||
|
||||
let results = request_threads()
|
||||
.broadcast(|ctx| {
|
||||
let index_rtxn = index
|
||||
.read_txn()
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let mut compressed_buffer = Vec::new();
|
||||
for (i, docid) in universe.iter().enumerate() {
|
||||
if i % ctx.num_threads() != ctx.index() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let document = index
|
||||
.document(&index_rtxn, docid)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let mut document = obkv_to_json(&all_fields, &fields_ids_map, document)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
// TODO definitely factorize this code
|
||||
'inject_vectors: {
|
||||
let embeddings = index
|
||||
.embeddings(&index_rtxn, docid)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME)
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
return Err(Error::from_milli(
|
||||
milli::Error::UserError(
|
||||
milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(
|
||||
&index_rtxn,
|
||||
std::iter::once(docid),
|
||||
)
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={docid}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
),
|
||||
Some(uid.to_string()),
|
||||
));
|
||||
};
|
||||
|
||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(
|
||||
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||
),
|
||||
regenerate,
|
||||
};
|
||||
vectors.insert(
|
||||
embedder_name,
|
||||
serde_json::to_value(embeddings).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
tmp_buffer.clear();
|
||||
serde_json::to_writer(&mut tmp_buffer, &document)
|
||||
.map_err(milli::InternalError::from)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
// Make sure we put at least one document in the buffer even
|
||||
// though we might go above the buffer limit before sending
|
||||
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
|
||||
// We compress the documents before sending them
|
||||
let mut encoder =
|
||||
GzEncoder::new(&mut compressed_buffer, Compression::default());
|
||||
encoder
|
||||
.write_all(&buffer)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||
encoder
|
||||
.finish()
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
request = request.set("Content-Encoding", "gzip");
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
|
||||
})?;
|
||||
buffer.clear();
|
||||
compressed_buffer.clear();
|
||||
}
|
||||
buffer.extend_from_slice(&tmp_buffer);
|
||||
|
||||
if i > 0 && i % 100 == 0 {
|
||||
step.fetch_add(100, atomic::Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_bytes(&buffer).map_err(into_backoff_error)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| {
|
||||
Error::from_milli(
|
||||
milli::Error::InternalError(InternalError::PanicInThreadPool(e)),
|
||||
Some(uid.to_string()),
|
||||
)
|
||||
})?;
|
||||
for result in results {
|
||||
result?;
|
||||
}
|
||||
|
||||
step.store(total_documents, atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
fn retry<F>(must_stop_processing: &MustStopProcessing, send_request: F) -> Result<ureq::Response>
|
||||
where
|
||||
F: Fn() -> Result<ureq::Response, backoff::Error<ureq::Error>>,
|
||||
{
|
||||
match backoff::retry(ExponentialBackoff::default(), || {
|
||||
if must_stop_processing.get() {
|
||||
return Err(backoff::Error::Permanent(ureq::Error::Status(
|
||||
u16::MAX,
|
||||
// 444: Connection Closed Without Response
|
||||
Response::new(444, "Abort", "Aborted task").unwrap(),
|
||||
)));
|
||||
}
|
||||
send_request()
|
||||
}) {
|
||||
Ok(response) => Ok(response),
|
||||
Err(backoff::Error::Permanent(e)) => Err(ureq_error_into_error(e)),
|
||||
Err(backoff::Error::Transient { err, retry_after: _ }) => Err(ureq_error_into_error(err)),
|
||||
}
|
||||
}
|
||||
|
||||
fn into_backoff_error(err: ureq::Error) -> backoff::Error<ureq::Error> {
|
||||
match err {
|
||||
// Those code status must trigger an automatic retry
|
||||
// <https://www.restapitutorial.com/advanced/responses/retries>
|
||||
ureq::Error::Status(408 | 429 | 500 | 502 | 503 | 504, _) => {
|
||||
backoff::Error::Transient { err, retry_after: None }
|
||||
}
|
||||
ureq::Error::Status(_, _) => backoff::Error::Permanent(err),
|
||||
ureq::Error::Transport(_) => backoff::Error::Transient { err, retry_after: None },
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a `ureq::Error` into an `Error`.
|
||||
fn ureq_error_into_error(error: ureq::Error) -> Error {
|
||||
#[derive(Deserialize)]
|
||||
struct MeiliError {
|
||||
message: String,
|
||||
code: String,
|
||||
r#type: String,
|
||||
link: String,
|
||||
}
|
||||
|
||||
match error {
|
||||
// This is a workaround to handle task abortion - the error propagation path
|
||||
// makes it difficult to cleanly surface the abortion at this level.
|
||||
ureq::Error::Status(u16::MAX, _) => Error::AbortedTask,
|
||||
ureq::Error::Status(_, response) => match response.into_json() {
|
||||
Ok(MeiliError { message, code, r#type, link }) => {
|
||||
Error::FromRemoteWhenExporting { message, code, r#type, link }
|
||||
}
|
||||
Err(e) => e.into(),
|
||||
},
|
||||
ureq::Error::Transport(transport) => io::Error::new(io::ErrorKind::Other, transport).into(),
|
||||
}
|
||||
}
|
||||
|
||||
enum ExportIndex {}
|
@ -1,11 +1,13 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use bumpalo::collections::CollectIn;
|
||||
use bumpalo::Bump;
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::milli::documents::PrimaryKey;
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
||||
use meilisearch_types::milli::update::DocumentAdditionResult;
|
||||
use meilisearch_types::milli::{self, ChannelCongestion, Filter, ThreadPoolNoAbortBuilder};
|
||||
use meilisearch_types::milli::{self, ChannelCongestion, Filter};
|
||||
use meilisearch_types::settings::apply_settings_to_builder;
|
||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||
use meilisearch_types::Index;
|
||||
@ -24,7 +26,7 @@ impl IndexScheduler {
|
||||
/// The list of processed tasks.
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip(self, index_wtxn, index, progress),
|
||||
skip(self, index_wtxn, index, progress, embedder_stats),
|
||||
target = "indexing::scheduler"
|
||||
)]
|
||||
pub(crate) fn apply_index_operation<'i>(
|
||||
@ -33,6 +35,7 @@ impl IndexScheduler {
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
progress: &Progress,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||
let indexer_alloc = Bump::new();
|
||||
let started_processing_at = std::time::Instant::now();
|
||||
@ -86,8 +89,9 @@ impl IndexScheduler {
|
||||
let mut content_files_iter = content_files.iter();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let embedders = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
for operation in operations {
|
||||
match operation {
|
||||
@ -113,18 +117,8 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
let pool = &indexer_config.thread_pool;
|
||||
|
||||
progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
|
||||
let (document_changes, operation_stats, primary_key) = indexer
|
||||
@ -187,6 +181,7 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
&embedder_stats,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
|
||||
);
|
||||
@ -266,18 +261,8 @@ impl IndexScheduler {
|
||||
|
||||
let mut congestion = None;
|
||||
if task.error.is_none() {
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
let pool = &indexer_config.thread_pool;
|
||||
|
||||
let candidates_count = candidates.len();
|
||||
progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
|
||||
@ -290,8 +275,9 @@ impl IndexScheduler {
|
||||
})
|
||||
.unwrap()?;
|
||||
let embedders = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
progress.update_progress(DocumentEditionProgress::Indexing);
|
||||
@ -308,6 +294,7 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
&embedder_stats,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
@ -429,18 +416,8 @@ impl IndexScheduler {
|
||||
|
||||
let mut congestion = None;
|
||||
if !tasks.iter().all(|res| res.error.is_some()) {
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
let pool = &indexer_config.thread_pool;
|
||||
|
||||
progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
|
||||
let mut indexer = indexer::DocumentDeletion::new();
|
||||
@ -448,8 +425,9 @@ impl IndexScheduler {
|
||||
indexer.delete_documents_by_docids(to_delete);
|
||||
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
||||
let embedders = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
progress.update_progress(DocumentDeletionProgress::Indexing);
|
||||
@ -466,6 +444,7 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
&embedder_stats,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
@ -498,14 +477,11 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
progress.update_progress(SettingsProgress::ApplyTheSettings);
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)
|
||||
let congestion = builder
|
||||
.execute(&|| must_stop_processing.get(), progress, embedder_stats)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
Ok((tasks, None))
|
||||
Ok((tasks, congestion))
|
||||
}
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
index_uid,
|
||||
@ -521,6 +497,7 @@ impl IndexScheduler {
|
||||
tasks: cleared_tasks,
|
||||
},
|
||||
progress,
|
||||
embedder_stats.clone(),
|
||||
)?;
|
||||
|
||||
let (settings_tasks, _congestion) = self.apply_index_operation(
|
||||
@ -528,6 +505,7 @@ impl IndexScheduler {
|
||||
index,
|
||||
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
||||
progress,
|
||||
embedder_stats,
|
||||
)?;
|
||||
|
||||
let mut tasks = settings_tasks;
|
||||
|
@ -0,0 +1,17 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test.rs
|
||||
expression: config.embedder_options
|
||||
---
|
||||
{
|
||||
"Rest": {
|
||||
"api_key": "My super secret",
|
||||
"distribution": null,
|
||||
"dimensions": 4,
|
||||
"url": "http://localhost:7777",
|
||||
"request": "{{text}}",
|
||||
"search_fragments": {},
|
||||
"indexing_fragments": {},
|
||||
"response": "{{embedding}}",
|
||||
"headers": {}
|
||||
}
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: simple_hf_config.embedder_options
|
||||
---
|
||||
{
|
||||
"HuggingFace": {
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"distribution": null,
|
||||
"pooling": "useModel"
|
||||
}
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: doc
|
||||
---
|
||||
{
|
||||
"doggo": "Intel",
|
||||
"breed": "beagle",
|
||||
"_vectors": {
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
||||
0.3
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: doc
|
||||
---
|
||||
{
|
||||
"doggo": "kefir",
|
||||
"breed": "patou",
|
||||
"_vectors": {
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
||||
0.3
|
||||
]
|
||||
}
|
||||
}
|
@ -1,12 +1,17 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: simple_hf_config.embedder_options
|
||||
expression: fakerest_config.embedder_options
|
||||
---
|
||||
{
|
||||
"HuggingFace": {
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"Rest": {
|
||||
"api_key": "My super secret",
|
||||
"distribution": null,
|
||||
"pooling": "useModel"
|
||||
"dimensions": 384,
|
||||
"url": "http://localhost:7777",
|
||||
"request": "{{text}}",
|
||||
"search_fragments": {},
|
||||
"indexing_fragments": {},
|
||||
"response": "{{embedding}}",
|
||||
"headers": {}
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
@ -37,7 +37,7 @@ catto [1,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
@ -40,7 +40,7 @@ doggo [2,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@ -43,7 +43,7 @@ doggo [2,3,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
@ -3,11 +3,11 @@ use std::collections::BTreeMap;
|
||||
use big_s::S;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::settings::SettingEmbeddingSettings;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
@ -690,11 +690,20 @@ fn test_settings_update() {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap();
|
||||
let embedders = index.embedding_configs();
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
let IndexEmbeddingConfig { name, config, fragments } = configs.first().unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"default");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_json_snapshot!(config.embedder_options);
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -732,6 +741,7 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@ -765,6 +775,7 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@ -805,6 +816,7 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@ -846,6 +858,7 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
|
@ -3,13 +3,14 @@ use std::collections::BTreeMap;
|
||||
use big_s::S;
|
||||
use insta::assert_json_snapshot;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
||||
use meilisearch_types::milli::vector::SearchQuery;
|
||||
use meilisearch_types::milli::{self, obkv_to_json};
|
||||
use meilisearch_types::settings::{SettingEmbeddingSettings, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use milli::update::IndexDocumentsMethod::*;
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
use crate::test_utils::read_json;
|
||||
@ -85,28 +86,51 @@ fn import_vectors() {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let embedders = index.embedding_configs();
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } =
|
||||
let IndexEmbeddingConfig { name, config: fakerest_config, fragments } =
|
||||
configs.get(0).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
||||
let fakerest_name = name.clone();
|
||||
|
||||
let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } =
|
||||
let IndexEmbeddingConfig { name, config: simple_hf_config, fragments } =
|
||||
configs.get(1).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
||||
let simple_hf_name = name.clone();
|
||||
|
||||
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
|
||||
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
||||
let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap();
|
||||
let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap();
|
||||
let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap();
|
||||
let hf_runtime = configs.get(&simple_hf_name).unwrap();
|
||||
let hf_embedder = &hf_runtime.embedder;
|
||||
let beagle_embed = hf_embedder
|
||||
.embed_search(SearchQuery::Text("Intel the beagle best doggo"), None)
|
||||
.unwrap();
|
||||
let lab_embed =
|
||||
hf_embedder.embed_search(SearchQuery::Text("Max the lab best doggo"), None).unwrap();
|
||||
let patou_embed = hf_embedder
|
||||
.embed_search(SearchQuery::Text("kefir the patou best doggo"), None)
|
||||
.unwrap();
|
||||
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
|
||||
};
|
||||
|
||||
@ -166,22 +190,38 @@ fn import_vectors() {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let embedders = index.embedding_configs();
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||
configs.get(0).unwrap();
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == lab_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@ -239,25 +279,41 @@ fn import_vectors() {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let embedders = index.embedding_configs();
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||
configs.get(0).unwrap();
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
// automatically changed to patou because set to regenerate
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == patou_embed, @"true");
|
||||
// remained beagle
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@ -399,8 +455,8 @@ fn import_vectors_first_and_embedder_later() {
|
||||
.collect::<Vec<_>>();
|
||||
// the all the vectors linked to the new specified embedder have been removed
|
||||
// Only the unknown embedders stays in the document DB
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1,2,3]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4,5]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
|
||||
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||
// even though we specified the vector for the ID 3, it shouldn't be marked
|
||||
// as user provided since we explicitely marked it as NOT user provided.
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
@ -426,19 +482,28 @@ fn import_vectors_first_and_embedder_later() {
|
||||
},
|
||||
quantized: None,
|
||||
},
|
||||
user_provided: RoaringBitmap<[1, 2]>,
|
||||
fragments: FragmentConfigs(
|
||||
[],
|
||||
),
|
||||
},
|
||||
]
|
||||
"###);
|
||||
let info =
|
||||
index.embedding_configs().embedder_info(&rtxn, "my_doggo_embedder").unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[1, 2, 3]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[1, 2]>");
|
||||
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
|
||||
// the document with the id 3 should keep its original embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embeddings = &embeddings["my_doggo_embedder"];
|
||||
let (embeddings, _) = &embeddings["my_doggo_embedder"];
|
||||
|
||||
snapshot!(embeddings.len(), @"1");
|
||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||
@ -493,7 +558,7 @@ fn import_vectors_first_and_embedder_later() {
|
||||
"###);
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
|
||||
@ -501,7 +566,7 @@ fn import_vectors_first_and_embedder_later() {
|
||||
// the document with the id 4 should generate an embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
}
|
||||
@ -603,33 +668,35 @@ fn delete_document_containing_vector() {
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
quantized: None,
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
},
|
||||
user_provided: RoaringBitmap<[0]>,
|
||||
quantized: None,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
fragments: FragmentConfigs(
|
||||
[],
|
||||
),
|
||||
},
|
||||
]
|
||||
"###);
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["manual"];
|
||||
let (embedding, _) = &embeddings["manual"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
|
||||
index_scheduler
|
||||
@ -647,30 +714,32 @@ fn delete_document_containing_vector() {
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
quantized: None,
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
},
|
||||
user_provided: RoaringBitmap<[]>,
|
||||
quantized: None,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
fragments: FragmentConfigs(
|
||||
[],
|
||||
),
|
||||
},
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -800,7 +869,7 @@ fn delete_embedder_with_user_provided_vectors() {
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"regenerate":false,"embeddings":[[0.0,0.0,0.0]]}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"regenerate":false,"embeddings":[[1.0,1.0,1.0]]}}}]"###);
|
||||
}
|
||||
|
||||
{
|
||||
@ -835,6 +904,6 @@ fn delete_embedder_with_user_provided_vectors() {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// FIXME: redaction
|
||||
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###);
|
||||
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[0.0,0.0,0.0]]},\"my_doggo_embedder\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]]}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0]]}}}]""###);
|
||||
}
|
||||
}
|
||||
|
@ -37,6 +37,7 @@ pub(crate) enum FailureLocation {
|
||||
InsideCreateBatch,
|
||||
InsideProcessBatch,
|
||||
PanicInsideProcessBatch,
|
||||
ProcessExport,
|
||||
ProcessUpgrade,
|
||||
AcquiringWtxn,
|
||||
UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 },
|
||||
|
@ -1,7 +1,9 @@
|
||||
//! Utility functions on the DBs. Mainly getter and setters.
|
||||
|
||||
use crate::milli::progress::EmbedderStats;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::ops::Bound;
|
||||
use std::sync::Arc;
|
||||
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
|
||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||
@ -27,6 +29,7 @@ pub struct ProcessingBatch {
|
||||
pub uid: BatchId,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStats,
|
||||
pub embedder_stats: Arc<EmbedderStats>,
|
||||
|
||||
pub statuses: HashSet<Status>,
|
||||
pub kinds: HashSet<Kind>,
|
||||
@ -48,6 +51,7 @@ impl ProcessingBatch {
|
||||
uid,
|
||||
details: DetailsView::default(),
|
||||
stats: BatchStats::default(),
|
||||
embedder_stats: Default::default(),
|
||||
|
||||
statuses,
|
||||
kinds: HashSet::default(),
|
||||
@ -146,6 +150,7 @@ impl ProcessingBatch {
|
||||
progress: None,
|
||||
details: self.details.clone(),
|
||||
stats: self.stats.clone(),
|
||||
embedder_stats: self.embedder_stats.as_ref().into(),
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
enqueued_at: self.enqueued_at,
|
||||
@ -273,6 +278,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
||||
K::TaskCancelation { .. }
|
||||
| K::TaskDeletion { .. }
|
||||
| K::DumpCreation { .. }
|
||||
| K::Export { .. }
|
||||
| K::UpgradeDatabase { .. }
|
||||
| K::SnapshotCreation => (),
|
||||
};
|
||||
@ -600,6 +606,9 @@ impl crate::IndexScheduler {
|
||||
Details::Dump { dump_uid: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::DumpCreation);
|
||||
}
|
||||
Details::Export { url: _, api_key: _, payload_size: _, indexes: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::Export);
|
||||
}
|
||||
Details::UpgradeDatabase { from: _, to: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.5.1"
|
||||
criterion = "0.6.0"
|
||||
|
||||
[[bench]]
|
||||
name = "depth"
|
||||
|
@ -14,4 +14,6 @@ license.workspace = true
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
once_cell = "1.20"
|
||||
once_cell = "1.21"
|
||||
regex-lite = "0.1.6"
|
||||
uuid = { version = "1.17.0", features = ["v4"] }
|
||||
|
@ -4,9 +4,16 @@ use std::path::{Path, PathBuf};
|
||||
use std::sync::Mutex;
|
||||
|
||||
pub use insta;
|
||||
use insta::internals::{Content, ContentPath};
|
||||
use once_cell::sync::Lazy;
|
||||
use regex_lite::Regex;
|
||||
|
||||
static SNAPSHOT_NAMES: Lazy<Mutex<HashMap<PathBuf, usize>>> = Lazy::new(Mutex::default);
|
||||
/// A regex to match UUIDs in messages, specifically looking for the UUID v4 format
|
||||
static UUID_IN_MESSAGE_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
/// Return the md5 hash of the given string
|
||||
pub fn hash_snapshot(snap: &str) -> String {
|
||||
@ -26,6 +33,39 @@ pub fn default_snapshot_settings_for_test<'a>(
|
||||
let filename = path.file_name().unwrap().to_str().unwrap();
|
||||
settings.set_omit_expression(true);
|
||||
|
||||
fn uuid_in_message_redaction(content: Content, _content_path: ContentPath) -> Content {
|
||||
match &content {
|
||||
Content::String(s) => {
|
||||
let uuid_replaced = UUID_IN_MESSAGE_RE.replace_all(s, "[uuid]");
|
||||
Content::String(uuid_replaced.to_string())
|
||||
}
|
||||
_ => content,
|
||||
}
|
||||
}
|
||||
|
||||
fn uuid_in_json_key_redaction(content: Content, _content_path: ContentPath) -> Content {
|
||||
match content {
|
||||
Content::Map(map) => {
|
||||
let new_map = map
|
||||
.iter()
|
||||
.map(|(key, value)| match key {
|
||||
Content::String(s) => {
|
||||
let uuid_replaced = UUID_IN_MESSAGE_RE.replace_all(s, "[uuid]");
|
||||
(Content::String(uuid_replaced.to_string()), value.clone())
|
||||
}
|
||||
_ => (key.clone(), value.clone()),
|
||||
})
|
||||
.collect();
|
||||
Content::Map(new_map)
|
||||
}
|
||||
_ => content,
|
||||
}
|
||||
}
|
||||
|
||||
settings.add_dynamic_redaction(".**.message", uuid_in_message_redaction);
|
||||
settings.add_dynamic_redaction(".**.indexUid", uuid_in_message_redaction);
|
||||
settings.add_dynamic_redaction(".**.facetsByIndex", uuid_in_json_key_redaction);
|
||||
|
||||
let test_name = test_name.strip_suffix("::{{closure}}").unwrap_or(test_name);
|
||||
let test_name = test_name.rsplit("::").next().unwrap().to_owned();
|
||||
|
||||
@ -232,6 +272,9 @@ macro_rules! json_string {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate as meili_snap;
|
||||
use crate::UUID_IN_MESSAGE_RE;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[test]
|
||||
fn snap() {
|
||||
snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820");
|
||||
@ -279,4 +322,14 @@ mod tests {
|
||||
// snapshot_hash!("", name: "", @"d41d8cd98f00b204e9800998ecf8427e");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uuid_in_message_regex() {
|
||||
let uuid1 = Uuid::new_v4();
|
||||
let uuid2 = Uuid::new_v4();
|
||||
let uuid3 = Uuid::new_v4();
|
||||
let to_replace = format!("1 {uuid1} 2 {uuid2} 3 {uuid3} 4");
|
||||
let replaced = UUID_IN_MESSAGE_RE.replace_all(to_replace.as_str(), "[uuid]");
|
||||
assert_eq!(replaced, "1 [uuid] 2 [uuid] 3 [uuid] 4");
|
||||
}
|
||||
}
|
||||
|
@ -17,10 +17,10 @@ hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
sha2 = "0.10.9"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
|
@ -158,7 +158,7 @@ impl AuthController {
|
||||
self.store.delete_all_keys()
|
||||
}
|
||||
|
||||
/// Delete all the keys in the DB.
|
||||
/// Insert a key directly into the store.
|
||||
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
|
||||
self.store.put_api_key(key)?;
|
||||
Ok(())
|
||||
@ -351,6 +351,7 @@ pub struct IndexSearchRules {
|
||||
|
||||
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
|
||||
store.put_api_key(Key::default_chat())?;
|
||||
store.put_api_key(Key::default_read_only_admin())?;
|
||||
store.put_api_key(Key::default_admin())?;
|
||||
store.put_api_key(Key::default_search())?;
|
||||
|
||||
|
@ -88,7 +88,13 @@ impl HeedAuthStore {
|
||||
let mut actions = HashSet::new();
|
||||
for action in &key.actions {
|
||||
match action {
|
||||
Action::All => actions.extend(enum_iterator::all::<Action>()),
|
||||
Action::All => {
|
||||
actions.extend(enum_iterator::all::<Action>());
|
||||
actions.remove(&Action::AllGet);
|
||||
}
|
||||
Action::AllGet => {
|
||||
actions.extend(enum_iterator::all::<Action>().filter(|a| a.is_read()))
|
||||
}
|
||||
Action::DocumentsAll => {
|
||||
actions.extend(
|
||||
[Action::DocumentsGet, Action::DocumentsDelete, Action::DocumentsAdd]
|
||||
|
@ -11,37 +11,38 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.9.0", default-features = false }
|
||||
anyhow = "1.0.95"
|
||||
bumpalo = "3.16.0"
|
||||
actix-web = { version = "4.11.0", default-features = false }
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.6.0"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
convert_case = "0.8.0"
|
||||
csv = "1.3.1"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
either = { version = "1.13.0", features = ["serde"] }
|
||||
either = { version = "1.15.0", features = ["serde"] }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.35"
|
||||
flate2 = "1.1.2"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
rustc-hash = "2.1.0"
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
rustc-hash = "2.1.1"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = "1.43"
|
||||
utoipa = { version = "5.3.1", features = ["macros"] }
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
tokio = "1.45"
|
||||
utoipa = { version = "5.4.0", features = ["macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
|
@ -3,7 +3,7 @@ use serde::Serialize;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::batches::{Batch, BatchId, BatchStats};
|
||||
use crate::batches::{Batch, BatchId, BatchStats, EmbedderStatsView};
|
||||
use crate::task_view::DetailsView;
|
||||
use crate::tasks::serialize_duration;
|
||||
|
||||
@ -14,7 +14,7 @@ pub struct BatchView {
|
||||
pub uid: BatchId,
|
||||
pub progress: Option<ProgressView>,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStats,
|
||||
pub stats: BatchStatsView,
|
||||
#[serde(serialize_with = "serialize_duration", default)]
|
||||
pub duration: Option<Duration>,
|
||||
#[serde(with = "time::serde::rfc3339", default)]
|
||||
@ -25,13 +25,26 @@ pub struct BatchView {
|
||||
pub batch_strategy: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct BatchStatsView {
|
||||
#[serde(flatten)]
|
||||
pub stats: BatchStats,
|
||||
#[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)]
|
||||
pub embedder_requests: EmbedderStatsView,
|
||||
}
|
||||
|
||||
impl BatchView {
|
||||
pub fn from_batch(batch: &Batch) -> Self {
|
||||
Self {
|
||||
uid: batch.uid,
|
||||
progress: batch.progress.clone(),
|
||||
details: batch.details.clone(),
|
||||
stats: batch.stats.clone(),
|
||||
stats: BatchStatsView {
|
||||
stats: batch.stats.clone(),
|
||||
embedder_requests: batch.embedder_stats.clone(),
|
||||
},
|
||||
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::progress::ProgressView;
|
||||
use milli::progress::{EmbedderStats, ProgressView};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use utoipa::ToSchema;
|
||||
@ -19,6 +19,8 @@ pub struct Batch {
|
||||
pub progress: Option<ProgressView>,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStats,
|
||||
#[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)]
|
||||
pub embedder_stats: EmbedderStatsView,
|
||||
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub started_at: OffsetDateTime,
|
||||
@ -43,6 +45,7 @@ impl PartialEq for Batch {
|
||||
progress,
|
||||
details,
|
||||
stats,
|
||||
embedder_stats,
|
||||
started_at,
|
||||
finished_at,
|
||||
enqueued_at,
|
||||
@ -53,6 +56,7 @@ impl PartialEq for Batch {
|
||||
&& progress.is_none() == other.progress.is_none()
|
||||
&& details == &other.details
|
||||
&& stats == &other.stats
|
||||
&& embedder_stats == &other.embedder_stats
|
||||
&& started_at == &other.started_at
|
||||
&& finished_at == &other.finished_at
|
||||
&& enqueued_at == &other.enqueued_at
|
||||
@ -83,3 +87,30 @@ pub struct BatchStats {
|
||||
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
|
||||
pub internal_database_sizes: serde_json::Map<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct EmbedderStatsView {
|
||||
pub total: usize,
|
||||
pub failed: usize,
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub last_error: Option<String>,
|
||||
}
|
||||
|
||||
impl From<&EmbedderStats> for EmbedderStatsView {
|
||||
fn from(stats: &EmbedderStats) -> Self {
|
||||
let errors = stats.errors.read().unwrap_or_else(|p| p.into_inner());
|
||||
Self {
|
||||
total: stats.total_count.load(std::sync::atomic::Ordering::Relaxed),
|
||||
failed: errors.1 as usize,
|
||||
last_error: errors.0.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EmbedderStatsView {
|
||||
pub fn skip_serializing(&self) -> bool {
|
||||
self.total == 0 && self.failed == 0 && self.last_error.is_none()
|
||||
}
|
||||
}
|
||||
|
@ -237,6 +237,7 @@ InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQU
|
||||
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||
@ -301,6 +302,7 @@ InvalidFacetSearchQuery , InvalidRequest , BAD_REQU
|
||||
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
||||
FacetSearchDisabled , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchMedia , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
@ -308,6 +310,7 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
|
||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||
@ -389,6 +392,13 @@ InvalidDocumentEditionContext , InvalidRequest , BAD_REQU
|
||||
InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ;
|
||||
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsIndexChat , InvalidRequest , BAD_REQUEST ;
|
||||
// Export
|
||||
InvalidExportUrl , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportPayloadSize , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportIndexesPatterns , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportIndexFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportIndexOverrideSettings , InvalidRequest , BAD_REQUEST ;
|
||||
// Experimental features - Chat Completions
|
||||
UnimplementedExternalFunctionCalling , InvalidRequest , NOT_IMPLEMENTED ;
|
||||
UnimplementedNonStreamingChatCompletions , InvalidRequest , NOT_IMPLEMENTED ;
|
||||
@ -406,6 +416,7 @@ InvalidChatCompletionPrompts , InvalidRequest , BAD_REQU
|
||||
InvalidChatCompletionSystemPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionSearchDescriptionPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionSearchFilterParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST
|
||||
}
|
||||
@ -457,6 +468,7 @@ impl ErrorCode for milli::Error {
|
||||
| UserError::MissingSourceForNested { .. }
|
||||
| UserError::InvalidSettingsEmbedder { .. } => Code::InvalidSettingsEmbedders,
|
||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||
UserError::TooManyFragments(_) => Code::InvalidSettingsEmbedders,
|
||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
||||
UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
|
||||
@ -466,7 +478,8 @@ impl ErrorCode for milli::Error {
|
||||
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
|
||||
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
|
||||
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
|
||||
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
|
||||
UserError::InvalidSearchSortableAttribute { .. } => Code::InvalidSearchSort,
|
||||
UserError::InvalidDocumentSortableAttribute { .. } => Code::InvalidDocumentSort,
|
||||
UserError::InvalidSearchableAttribute { .. } => {
|
||||
Code::InvalidSearchAttributesToSearchOn
|
||||
}
|
||||
@ -482,7 +495,8 @@ impl ErrorCode for milli::Error {
|
||||
UserError::InvalidVectorsMapType { .. }
|
||||
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
|
||||
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
|
||||
UserError::SortError(_) => Code::InvalidSearchSort,
|
||||
UserError::SortError { search: true, .. } => Code::InvalidSearchSort,
|
||||
UserError::SortError { search: false, .. } => Code::InvalidDocumentSort,
|
||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
||||
Code::InvalidSettingsTypoTolerance
|
||||
}
|
||||
|
@ -4,10 +4,11 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{Code, ResponseError};
|
||||
|
||||
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search.";
|
||||
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search. Meilisearch doesn't use the colon (:) syntax to filter but rather the equal (=) one. Separate filters from query and keep the q parameter empty if needed. Same for the filter parameter: keep it empty if need be. If you need to find documents that CONTAINS keywords simply put the keywords in the q parameter do no use a filter for this purpose. Whenever you get an error, read the error message and fix your error. ";
|
||||
pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str =
|
||||
"Search the database for relevant JSON documents using an optional query.";
|
||||
"Query: 'best story about Rust before 2018' with year: 2018, 2020, 2021\nlabel: analysis, golang, javascript\ntype: story, link\nvote: 300, 298, 278\n: {\"q\": \"\", \"filter\": \"category = Rust AND type = story AND year < 2018 AND vote > 100\"}\nQuery: 'A black or green car that can go fast with red brakes' with maxspeed_kmh: 200, 150, 130\ncolor: black, grey, red, green\nbrand: Toyota, Renault, Jeep, Ferrari\n: {\"q\": \"red brakes\", \"filter\": \"maxspeed_kmh > 150 AND color IN ['black', green]\"}\nQuery: 'Superman movie released in 2018 or after' with year: 2018, 2020, 2021\ngenres: Drama, Comedy, Adventure, Fiction\n: {\"q\":\"Superman\",\"filter\":\"genres IN [Adventure, Fiction] AND year >= 2018\"}";
|
||||
pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results.";
|
||||
pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\". The following is a list of fields that can be filtered on: ";
|
||||
pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query.";
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
@ -21,6 +22,7 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub get_task_documents_route: bool,
|
||||
pub composite_embedders: bool,
|
||||
pub chat_completions: bool,
|
||||
pub multimodal: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
@ -114,7 +116,6 @@ pub enum ChatCompletionSource {
|
||||
OpenAi,
|
||||
AzureOpenAi,
|
||||
Mistral,
|
||||
Gemini,
|
||||
VLlm,
|
||||
}
|
||||
|
||||
@ -134,7 +135,6 @@ impl ChatCompletionSource {
|
||||
AzureOpenAi if Self::old_openai_model(model) => System,
|
||||
AzureOpenAi => Developer,
|
||||
Mistral => System,
|
||||
Gemini => System,
|
||||
VLlm => System,
|
||||
}
|
||||
}
|
||||
@ -154,7 +154,6 @@ impl ChatCompletionSource {
|
||||
match self {
|
||||
OpenAi => Some("https://api.openai.com/v1/"),
|
||||
Mistral => Some("https://api.mistral.ai/v1/"),
|
||||
Gemini => Some("https://generativelanguage.googleapis.com/v1beta/openai/"),
|
||||
AzureOpenAi | VLlm => None,
|
||||
}
|
||||
}
|
||||
@ -166,6 +165,7 @@ pub struct ChatCompletionPrompts {
|
||||
pub system: String,
|
||||
pub search_description: String,
|
||||
pub search_q_param: String,
|
||||
pub search_filter_param: String,
|
||||
pub search_index_uid_param: String,
|
||||
}
|
||||
|
||||
@ -175,6 +175,7 @@ impl Default for ChatCompletionPrompts {
|
||||
system: DEFAULT_CHAT_SYSTEM_PROMPT.to_string(),
|
||||
search_description: DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(),
|
||||
search_q_param: DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
|
||||
search_filter_param: DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
|
||||
search_index_uid_param: DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ use crate::index_uid::{IndexUid, IndexUidFormatError};
|
||||
|
||||
/// An index uid pattern is composed of only ascii alphanumeric characters, - and _, between 1 and 400
|
||||
/// bytes long and optionally ending with a *.
|
||||
#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, Hash)]
|
||||
#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[deserr(try_from(&String) = FromStr::from_str -> IndexUidPatternFormatError)]
|
||||
pub struct IndexUidPattern(String);
|
||||
|
||||
|
@ -144,6 +144,21 @@ impl Key {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_read_only_admin() -> Self {
|
||||
let now = OffsetDateTime::now_utc();
|
||||
let uid = Uuid::new_v4();
|
||||
Self {
|
||||
name: Some("Default Read-Only Admin API Key".to_string()),
|
||||
description: Some("Use it to read information across the whole database. Caution! Do not expose this key on a public frontend".to_string()),
|
||||
uid,
|
||||
actions: vec![Action::AllGet, Action::KeysGet],
|
||||
indexes: vec![IndexUidPattern::all()],
|
||||
expires_at: None,
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_search() -> Self {
|
||||
let now = OffsetDateTime::now_utc();
|
||||
let uid = Uuid::new_v4();
|
||||
@ -218,6 +233,9 @@ pub enum Action {
|
||||
#[serde(rename = "*")]
|
||||
#[deserr(rename = "*")]
|
||||
All = 0,
|
||||
#[serde(rename = "*.get")]
|
||||
#[deserr(rename = "*.get")]
|
||||
AllGet,
|
||||
#[serde(rename = "search")]
|
||||
#[deserr(rename = "search")]
|
||||
Search,
|
||||
@ -317,6 +335,9 @@ pub enum Action {
|
||||
#[serde(rename = "experimental.update")]
|
||||
#[deserr(rename = "experimental.update")]
|
||||
ExperimentalFeaturesUpdate,
|
||||
#[serde(rename = "export")]
|
||||
#[deserr(rename = "export")]
|
||||
Export,
|
||||
#[serde(rename = "network.get")]
|
||||
#[deserr(rename = "network.get")]
|
||||
NetworkGet,
|
||||
@ -396,6 +417,52 @@ impl Action {
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether the action should be included in [Action::AllRead].
|
||||
pub fn is_read(&self) -> bool {
|
||||
use Action::*;
|
||||
|
||||
// It's using an exhaustive match to force the addition of new actions.
|
||||
match self {
|
||||
// Any action that expands to others must return false, as it wouldn't be able to expand recursively.
|
||||
All | AllGet | DocumentsAll | IndexesAll | ChatsAll | TasksAll | SettingsAll
|
||||
| StatsAll | MetricsAll | DumpsAll | SnapshotsAll | ChatsSettingsAll => false,
|
||||
|
||||
Search => true,
|
||||
DocumentsAdd => false,
|
||||
DocumentsGet => true,
|
||||
DocumentsDelete => false,
|
||||
Export => true,
|
||||
IndexesAdd => false,
|
||||
IndexesGet => true,
|
||||
IndexesUpdate => false,
|
||||
IndexesDelete => false,
|
||||
IndexesSwap => false,
|
||||
TasksCancel => false,
|
||||
TasksDelete => false,
|
||||
TasksGet => true,
|
||||
SettingsGet => true,
|
||||
SettingsUpdate => false,
|
||||
StatsGet => true,
|
||||
MetricsGet => true,
|
||||
DumpsCreate => false,
|
||||
SnapshotsCreate => false,
|
||||
Version => true,
|
||||
KeysAdd => false,
|
||||
KeysGet => false, // Disabled in order to prevent privilege escalation
|
||||
KeysUpdate => false,
|
||||
KeysDelete => false,
|
||||
ExperimentalFeaturesGet => true,
|
||||
ExperimentalFeaturesUpdate => false,
|
||||
NetworkGet => true,
|
||||
NetworkUpdate => false,
|
||||
ChatCompletions => false, // Disabled because it might trigger generation of new chats
|
||||
ChatsGet => true,
|
||||
ChatsDelete => false,
|
||||
ChatsSettingsGet => true,
|
||||
ChatsSettingsUpdate => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn repr(&self) -> u8 {
|
||||
*self as u8
|
||||
}
|
||||
@ -405,6 +472,7 @@ pub mod actions {
|
||||
use super::Action::*;
|
||||
|
||||
pub(crate) const ALL: u8 = All.repr();
|
||||
pub const ALL_GET: u8 = AllGet.repr();
|
||||
pub const SEARCH: u8 = Search.repr();
|
||||
pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr();
|
||||
pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr();
|
||||
@ -438,6 +506,8 @@ pub mod actions {
|
||||
pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr();
|
||||
pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr();
|
||||
|
||||
pub const EXPORT: u8 = Export.repr();
|
||||
|
||||
pub const NETWORK_GET: u8 = NetworkGet.repr();
|
||||
pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr();
|
||||
|
||||
|
@ -18,7 +18,7 @@ pub mod versioning;
|
||||
pub use milli::{heed, Index};
|
||||
use uuid::Uuid;
|
||||
pub use versioning::VERSION_FILE_NAME;
|
||||
pub use {milli, serde_cs};
|
||||
pub use {byte_unit, milli, serde_cs};
|
||||
|
||||
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||
pub type InstanceUid = Uuid;
|
||||
|
@ -9,10 +9,11 @@ use std::str::FromStr;
|
||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||
use fst::IntoStreamer;
|
||||
use milli::disabled_typos_terms::DisabledTyposTerms;
|
||||
use milli::index::{IndexEmbeddingConfig, PrefixSearch};
|
||||
use milli::index::PrefixSearch;
|
||||
use milli::proximity::ProximityPrecision;
|
||||
pub use milli::update::ChatSettings;
|
||||
use milli::update::Setting;
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use utoipa::ToSchema;
|
||||
@ -500,8 +501,11 @@ impl Settings<Unchecked> {
|
||||
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
|
||||
for (name, config) in configs.iter_mut() {
|
||||
let config_to_check = std::mem::take(config);
|
||||
let checked_config =
|
||||
milli::update::validate_embedding_settings(config_to_check.inner, name)?;
|
||||
let checked_config = milli::update::validate_embedding_settings(
|
||||
config_to_check.inner,
|
||||
name,
|
||||
milli::vector::settings::EmbeddingValidationContext::SettingsPartialUpdate,
|
||||
)?;
|
||||
*config = SettingEmbeddingSettings { inner: checked_config };
|
||||
}
|
||||
self.embedders = Setting::Set(configs);
|
||||
@ -697,7 +701,7 @@ pub fn apply_settings_to_builder(
|
||||
match typo_tolerance {
|
||||
Setting::Set(ref value) => {
|
||||
match value.enabled {
|
||||
Setting::Set(val) => builder.set_autorize_typos(val),
|
||||
Setting::Set(val) => builder.set_authorize_typos(val),
|
||||
Setting::Reset => builder.reset_authorize_typos(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
@ -751,6 +755,7 @@ pub fn apply_settings_to_builder(
|
||||
builder.reset_min_word_len_two_typos();
|
||||
builder.reset_exact_words();
|
||||
builder.reset_exact_attributes();
|
||||
builder.reset_disable_on_numbers();
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
@ -910,6 +915,7 @@ pub fn settings(
|
||||
};
|
||||
|
||||
let embedders: BTreeMap<_, _> = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(rtxn)?
|
||||
.into_iter()
|
||||
.map(|IndexEmbeddingConfig { name, config, .. }| {
|
||||
@ -968,6 +974,7 @@ pub fn settings(
|
||||
if let SecretPolicy::HideSecrets = secret_policy {
|
||||
settings.hide_secrets()
|
||||
}
|
||||
|
||||
Ok(settings)
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,6 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use byte_unit::UnitType;
|
||||
use milli::Object;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
@ -6,7 +9,9 @@ use utoipa::ToSchema;
|
||||
use crate::batches::BatchId;
|
||||
use crate::error::ResponseError;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
|
||||
use crate::tasks::{
|
||||
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@ -118,6 +123,15 @@ pub struct DetailsView {
|
||||
pub upgrade_from: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub upgrade_to: Option<String>,
|
||||
// exporting
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub url: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub api_key: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub payload_size: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub indexes: Option<BTreeMap<String, DetailsExportIndexSettings>>,
|
||||
}
|
||||
|
||||
impl DetailsView {
|
||||
@ -238,6 +252,34 @@ impl DetailsView {
|
||||
Some(left)
|
||||
}
|
||||
},
|
||||
url: match (self.url.clone(), other.url.clone()) {
|
||||
(None, None) => None,
|
||||
(None, Some(url)) | (Some(url), None) => Some(url),
|
||||
// We should never be able to batch multiple exports at the same time.
|
||||
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
api_key: match (self.api_key.clone(), other.api_key.clone()) {
|
||||
(None, None) => None,
|
||||
(None, Some(key)) | (Some(key), None) => Some(key),
|
||||
// We should never be able to batch multiple exports at the same time.
|
||||
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
payload_size: match (self.payload_size.clone(), other.payload_size.clone()) {
|
||||
(None, None) => None,
|
||||
(None, Some(size)) | (Some(size), None) => Some(size),
|
||||
// We should never be able to batch multiple exports at the same time.
|
||||
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
indexes: match (self.indexes.clone(), other.indexes.clone()) {
|
||||
(None, None) => None,
|
||||
(None, Some(indexes)) | (Some(indexes), None) => Some(indexes),
|
||||
// We should never be able to batch multiple exports at the same time.
|
||||
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
// We want the earliest version
|
||||
upgrade_from: match (self.upgrade_from.clone(), other.upgrade_from.clone()) {
|
||||
(None, None) => None,
|
||||
@ -327,6 +369,22 @@ impl From<Details> for DetailsView {
|
||||
Details::IndexSwap { swaps } => {
|
||||
DetailsView { swaps: Some(swaps), ..Default::default() }
|
||||
}
|
||||
Details::Export { url, api_key, payload_size, indexes } => DetailsView {
|
||||
url: Some(url),
|
||||
api_key: api_key.map(|mut api_key| {
|
||||
hide_secret(&mut api_key);
|
||||
api_key
|
||||
}),
|
||||
payload_size: payload_size
|
||||
.map(|ps| ps.get_appropriate_unit(UnitType::Both).to_string()),
|
||||
indexes: Some(
|
||||
indexes
|
||||
.into_iter()
|
||||
.map(|(pattern, settings)| (pattern.to_string(), settings))
|
||||
.collect(),
|
||||
),
|
||||
..Default::default()
|
||||
},
|
||||
Details::UpgradeDatabase { from, to } => DetailsView {
|
||||
upgrade_from: Some(format!("v{}.{}.{}", from.0, from.1, from.2)),
|
||||
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),
|
||||
@ -335,3 +393,21 @@ impl From<Details> for DetailsView {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We definitely need to factorize the code to hide the secret key
|
||||
fn hide_secret(secret: &mut String) {
|
||||
match secret.len() {
|
||||
x if x < 10 => {
|
||||
secret.replace_range(.., "XXX...");
|
||||
}
|
||||
x if x < 20 => {
|
||||
secret.replace_range(2.., "XXXX...");
|
||||
}
|
||||
x if x < 30 => {
|
||||
secret.replace_range(3.., "XXXXX...");
|
||||
}
|
||||
_x => {
|
||||
secret.replace_range(5.., "XXXXXX...");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,19 +1,22 @@
|
||||
use core::fmt;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::fmt::{Display, Write};
|
||||
use std::str::FromStr;
|
||||
|
||||
use byte_unit::Byte;
|
||||
use enum_iterator::Sequence;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use milli::Object;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use serde_json::Value;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
use utoipa::ToSchema;
|
||||
use utoipa::{schema, ToSchema};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::batches::BatchId;
|
||||
use crate::error::ResponseError;
|
||||
use crate::index_uid_pattern::IndexUidPattern;
|
||||
use crate::keys::Key;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::{versioning, InstanceUid};
|
||||
@ -50,6 +53,7 @@ impl Task {
|
||||
| SnapshotCreation
|
||||
| TaskCancelation { .. }
|
||||
| TaskDeletion { .. }
|
||||
| Export { .. }
|
||||
| UpgradeDatabase { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
DocumentAdditionOrUpdate { index_uid, .. }
|
||||
@ -86,6 +90,7 @@ impl Task {
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::SnapshotCreation
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. } => None,
|
||||
}
|
||||
}
|
||||
@ -108,11 +113,11 @@ pub enum KindWithContent {
|
||||
},
|
||||
DocumentDeletionByFilter {
|
||||
index_uid: String,
|
||||
filter_expr: serde_json::Value,
|
||||
filter_expr: Value,
|
||||
},
|
||||
DocumentEdition {
|
||||
index_uid: String,
|
||||
filter_expr: Option<serde_json::Value>,
|
||||
filter_expr: Option<Value>,
|
||||
context: Option<milli::Object>,
|
||||
function: String,
|
||||
},
|
||||
@ -152,6 +157,12 @@ pub enum KindWithContent {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
},
|
||||
SnapshotCreation,
|
||||
Export {
|
||||
url: String,
|
||||
api_key: Option<String>,
|
||||
payload_size: Option<Byte>,
|
||||
indexes: BTreeMap<IndexUidPattern, ExportIndexSettings>,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
@ -163,6 +174,13 @@ pub struct IndexSwap {
|
||||
pub indexes: (String, String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ExportIndexSettings {
|
||||
pub filter: Option<Value>,
|
||||
pub override_settings: bool,
|
||||
}
|
||||
|
||||
impl KindWithContent {
|
||||
pub fn as_kind(&self) -> Kind {
|
||||
match self {
|
||||
@ -180,6 +198,7 @@ impl KindWithContent {
|
||||
KindWithContent::TaskDeletion { .. } => Kind::TaskDeletion,
|
||||
KindWithContent::DumpCreation { .. } => Kind::DumpCreation,
|
||||
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
|
||||
KindWithContent::Export { .. } => Kind::Export,
|
||||
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
|
||||
}
|
||||
}
|
||||
@ -192,6 +211,7 @@ impl KindWithContent {
|
||||
| SnapshotCreation
|
||||
| TaskCancelation { .. }
|
||||
| TaskDeletion { .. }
|
||||
| Export { .. }
|
||||
| UpgradeDatabase { .. } => vec![],
|
||||
DocumentAdditionOrUpdate { index_uid, .. }
|
||||
| DocumentEdition { index_uid, .. }
|
||||
@ -269,6 +289,14 @@ impl KindWithContent {
|
||||
}),
|
||||
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
||||
KindWithContent::SnapshotCreation => None,
|
||||
KindWithContent::Export { url, api_key, payload_size, indexes } => {
|
||||
Some(Details::Export {
|
||||
url: url.clone(),
|
||||
api_key: api_key.clone(),
|
||||
payload_size: *payload_size,
|
||||
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
|
||||
})
|
||||
}
|
||||
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
||||
from: (from.0, from.1, from.2),
|
||||
to: (
|
||||
@ -335,6 +363,14 @@ impl KindWithContent {
|
||||
}),
|
||||
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
||||
KindWithContent::SnapshotCreation => None,
|
||||
KindWithContent::Export { url, api_key, payload_size, indexes } => {
|
||||
Some(Details::Export {
|
||||
url: url.clone(),
|
||||
api_key: api_key.clone(),
|
||||
payload_size: *payload_size,
|
||||
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
|
||||
})
|
||||
}
|
||||
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
||||
from: *from,
|
||||
to: (
|
||||
@ -383,6 +419,14 @@ impl From<&KindWithContent> for Option<Details> {
|
||||
}),
|
||||
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
||||
KindWithContent::SnapshotCreation => None,
|
||||
KindWithContent::Export { url, api_key, payload_size, indexes } => {
|
||||
Some(Details::Export {
|
||||
url: url.clone(),
|
||||
api_key: api_key.clone(),
|
||||
payload_size: *payload_size,
|
||||
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
|
||||
})
|
||||
}
|
||||
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
||||
from: *from,
|
||||
to: (
|
||||
@ -499,6 +543,7 @@ pub enum Kind {
|
||||
TaskDeletion,
|
||||
DumpCreation,
|
||||
SnapshotCreation,
|
||||
Export,
|
||||
UpgradeDatabase,
|
||||
}
|
||||
|
||||
@ -516,6 +561,7 @@ impl Kind {
|
||||
| Kind::TaskCancelation
|
||||
| Kind::TaskDeletion
|
||||
| Kind::DumpCreation
|
||||
| Kind::Export
|
||||
| Kind::UpgradeDatabase
|
||||
| Kind::SnapshotCreation => false,
|
||||
}
|
||||
@ -536,6 +582,7 @@ impl Display for Kind {
|
||||
Kind::TaskDeletion => write!(f, "taskDeletion"),
|
||||
Kind::DumpCreation => write!(f, "dumpCreation"),
|
||||
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
|
||||
Kind::Export => write!(f, "export"),
|
||||
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
|
||||
}
|
||||
}
|
||||
@ -568,6 +615,8 @@ impl FromStr for Kind {
|
||||
Ok(Kind::DumpCreation)
|
||||
} else if kind.eq_ignore_ascii_case("snapshotCreation") {
|
||||
Ok(Kind::SnapshotCreation)
|
||||
} else if kind.eq_ignore_ascii_case("export") {
|
||||
Ok(Kind::Export)
|
||||
} else if kind.eq_ignore_ascii_case("upgradeDatabase") {
|
||||
Ok(Kind::UpgradeDatabase)
|
||||
} else {
|
||||
@ -643,12 +692,33 @@ pub enum Details {
|
||||
IndexSwap {
|
||||
swaps: Vec<IndexSwap>,
|
||||
},
|
||||
Export {
|
||||
url: String,
|
||||
api_key: Option<String>,
|
||||
payload_size: Option<Byte>,
|
||||
indexes: BTreeMap<IndexUidPattern, DetailsExportIndexSettings>,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
to: (u32, u32, u32),
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct DetailsExportIndexSettings {
|
||||
#[serde(flatten)]
|
||||
pub settings: ExportIndexSettings,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub matched_documents: Option<u64>,
|
||||
}
|
||||
|
||||
impl From<ExportIndexSettings> for DetailsExportIndexSettings {
|
||||
fn from(settings: ExportIndexSettings) -> Self {
|
||||
DetailsExportIndexSettings { settings, matched_documents: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl Details {
|
||||
pub fn to_failed(&self) -> Self {
|
||||
let mut details = self.clone();
|
||||
@ -667,6 +737,7 @@ impl Details {
|
||||
Self::SettingsUpdate { .. }
|
||||
| Self::IndexInfo { .. }
|
||||
| Self::Dump { .. }
|
||||
| Self::Export { .. }
|
||||
| Self::UpgradeDatabase { .. }
|
||||
| Self::IndexSwap { .. } => (),
|
||||
}
|
||||
|
@ -13,51 +13,50 @@ license.workspace = true
|
||||
default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.0"
|
||||
actix-http = { version = "3.9.0", default-features = false, features = [
|
||||
actix-cors = "0.7.1"
|
||||
actix-http = { version = "3.11.0", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
actix-utils = "3.0.1"
|
||||
actix-web = { version = "4.9.0", default-features = false, features = [
|
||||
actix-web = { version = "4.11.0", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
anyhow = { version = "1.0.95", features = ["backtrace"] }
|
||||
async-trait = "0.1.85"
|
||||
bstr = "1.11.3"
|
||||
anyhow = { version = "1.0.98", features = ["backtrace"] }
|
||||
bstr = "1.12.0"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
bytes = "1.9.0"
|
||||
bumpalo = "3.16.0"
|
||||
clap = { version = "4.5.24", features = ["derive", "env"] }
|
||||
bytes = "1.10.1"
|
||||
bumpalo = "3.18.1"
|
||||
clap = { version = "4.5.40", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.15"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.13.0"
|
||||
either = "1.15.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.35"
|
||||
flate2 = "1.1.2"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3.31"
|
||||
futures-util = "0.3.31"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "2.7.0", features = ["serde"] }
|
||||
is-terminal = "0.4.13"
|
||||
indexmap = { version = "2.9.0", features = ["serde"] }
|
||||
is-terminal = "0.4.16"
|
||||
itertools = "0.14.0"
|
||||
jsonwebtoken = "9.3.0"
|
||||
jsonwebtoken = "9.3.1"
|
||||
lazy_static = "1.5.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.16.0"
|
||||
num_cpus = "1.17.0"
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.20.2"
|
||||
ordered-float = "4.6.0"
|
||||
parking_lot = "0.12.3"
|
||||
once_cell = "1.21.3"
|
||||
ordered-float = "5.0.0"
|
||||
parking_lot = "0.12.4"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.16"
|
||||
platform-dirs = "0.3.0"
|
||||
@ -65,44 +64,44 @@ prometheus = { version = "0.14.0", features = ["process"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.10.0"
|
||||
regex = "1.11.1"
|
||||
reqwest = { version = "0.12.12", features = [
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
rustls = { version = "0.23.20", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.10.1", features = ["alloc"] }
|
||||
rustls = { version = "0.23.28", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.12.0", features = ["alloc"] }
|
||||
rustls-pemfile = "2.2.0"
|
||||
segment = { version = "0.2.5" }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
segment = { version = "0.2.6" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
sha2 = "0.10.9"
|
||||
siphasher = "1.0.1"
|
||||
slice-group-by = "0.3.1"
|
||||
static-files = { version = "0.2.4", optional = true }
|
||||
sysinfo = "0.33.1"
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
static-files = { version = "0.2.5", optional = true }
|
||||
sysinfo = "0.35.2"
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.43.1", features = ["full"] }
|
||||
toml = "0.8.19"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
tokio = { version = "1.45.1", features = ["full"] }
|
||||
toml = "0.8.23"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.4", features = ["serde"] }
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = { version = "0.3.19", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.15"
|
||||
tracing-actix-web = "0.7.18"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
roaring = "0.10.10"
|
||||
roaring = "0.10.12"
|
||||
mopa-maintained = "0.2.3"
|
||||
utoipa = { version = "5.3.1", features = [
|
||||
utoipa = { version = "5.4.0", features = [
|
||||
"actix_extras",
|
||||
"macros",
|
||||
"non_strict_integers",
|
||||
@ -118,29 +117,29 @@ actix-web-lab = { version = "0.24.1", default-features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.10.0"
|
||||
brotli = "6.0.0"
|
||||
brotli = "8.0.1"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
insta = { version = "=1.39.0", features = ["redactions"] }
|
||||
manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
temp-env = "0.3.6"
|
||||
urlencoding = "2.1.3"
|
||||
wiremock = "0.6.2"
|
||||
wiremock = "0.6.3"
|
||||
yaup = "0.3.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.95", optional = true }
|
||||
cargo_toml = { version = "0.21.0", optional = true }
|
||||
anyhow = { version = "1.0.98", optional = true }
|
||||
cargo_toml = { version = "0.22.1", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.12.12", features = [
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
"blocking",
|
||||
"rustls-tls",
|
||||
], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.4", optional = true }
|
||||
tempfile = { version = "3.15.0", optional = true }
|
||||
zip = { version = "2.3.0", optional = true }
|
||||
static-files = { version = "0.2.5", optional = true }
|
||||
tempfile = { version = "3.20.0", optional = true }
|
||||
zip = { version = "4.1.0", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
@ -170,5 +169,5 @@ german = ["meilisearch-types/german"]
|
||||
turkish = ["meilisearch-types/turkish"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.19/build.zip"
|
||||
sha1 = "7974430d5277c97f67cf6e95eec6faaac2788834"
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.20/build.zip"
|
||||
sha1 = "82a7ddd7bf14bb5323c3d235d2b62892a98b6a59"
|
||||
|
@ -104,6 +104,4 @@ impl Analytics for MockAnalytics {
|
||||
_request: &HttpRequest,
|
||||
) {
|
||||
}
|
||||
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||
}
|
||||
|
@ -73,12 +73,6 @@ pub enum DocumentDeletionKind {
|
||||
PerFilter,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum DocumentFetchKind {
|
||||
PerDocumentId { retrieve_vectors: bool },
|
||||
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
|
||||
}
|
||||
|
||||
/// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
|
||||
pub trait Aggregate: 'static + mopa::Any + Send {
|
||||
/// The name of the event that will be sent to segment.
|
||||
|
@ -197,11 +197,13 @@ struct Infos {
|
||||
experimental_max_number_of_batched_tasks: usize,
|
||||
experimental_limit_batched_tasks_total_size: u64,
|
||||
experimental_network: bool,
|
||||
experimental_multimodal: bool,
|
||||
experimental_chat_completions: bool,
|
||||
experimental_get_task_documents_route: bool,
|
||||
experimental_composite_embedders: bool,
|
||||
experimental_embedding_cache_entries: usize,
|
||||
experimental_no_snapshot_compaction: bool,
|
||||
experimental_no_edition_2024_for_settings: bool,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
@ -286,8 +288,12 @@ impl Infos {
|
||||
ScheduleSnapshot::Enabled(interval) => Some(interval),
|
||||
};
|
||||
|
||||
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
|
||||
indexer_options;
|
||||
let IndexerOpts {
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
} = indexer_options;
|
||||
|
||||
let RuntimeTogglableFeatures {
|
||||
metrics,
|
||||
@ -298,6 +304,7 @@ impl Infos {
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
} = features;
|
||||
|
||||
// We're going to override every sensible information.
|
||||
@ -317,6 +324,7 @@ impl Infos {
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_network: network,
|
||||
experimental_chat_completions: chat_completions,
|
||||
experimental_multimodal: multimodal,
|
||||
experimental_get_task_documents_route: get_task_documents_route,
|
||||
experimental_composite_embedders: composite_embedders,
|
||||
experimental_embedding_cache_entries,
|
||||
@ -350,6 +358,7 @@ impl Infos {
|
||||
ssl_require_auth,
|
||||
ssl_resumption,
|
||||
ssl_tickets,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ pub enum MeilisearchHttpError {
|
||||
TooManySearchRequests(usize),
|
||||
#[error("Internal error: Search limiter is down.")]
|
||||
SearchLimiterIsDown,
|
||||
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
|
||||
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(if *.0 % 1024 == 0 { UnitType::Binary } else { UnitType::Decimal }))]
|
||||
PayloadTooLarge(usize),
|
||||
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
|
||||
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
|
||||
@ -76,8 +76,10 @@ pub enum MeilisearchHttpError {
|
||||
DocumentFormat(#[from] DocumentFormatError),
|
||||
#[error(transparent)]
|
||||
Join(#[from] JoinError),
|
||||
#[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
|
||||
#[error("Invalid request: missing `hybrid` parameter when `vector` or `media` are present.")]
|
||||
MissingSearchHybrid,
|
||||
#[error("Invalid request: both `media` and `vector` parameters are present.")]
|
||||
MediaAndVector,
|
||||
}
|
||||
|
||||
impl MeilisearchHttpError {
|
||||
@ -111,6 +113,7 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
||||
MeilisearchHttpError::MediaAndVector => Code::InvalidSearchMediaAndVector,
|
||||
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
|
||||
Code::InvalidMultiSearchFederationOptions
|
||||
}
|
||||
|
@ -37,7 +37,10 @@ use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
||||
use meilisearch_auth::{open_auth_store_env, AuthController};
|
||||
use meilisearch_types::milli::constants::VERSION_MAJOR;
|
||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
|
||||
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||
use meilisearch_types::milli::update::{
|
||||
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
|
||||
};
|
||||
use meilisearch_types::settings::apply_settings_to_builder;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::versioning::{
|
||||
@ -461,6 +464,7 @@ fn import_dump(
|
||||
index_scheduler: &mut IndexScheduler,
|
||||
auth: &mut AuthController,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let progress = Progress::default();
|
||||
let reader = File::open(dump_path)?;
|
||||
let mut dump_reader = dump::DumpReader::open(reader)?;
|
||||
|
||||
@ -494,19 +498,37 @@ fn import_dump(
|
||||
keys.push(key);
|
||||
}
|
||||
|
||||
// 3. Import the runtime features and network
|
||||
// 3. Import the `ChatCompletionSettings`s.
|
||||
for result in dump_reader.chat_completions_settings()? {
|
||||
let (name, settings) = result?;
|
||||
index_scheduler.put_chat_settings(&name, &settings)?;
|
||||
}
|
||||
|
||||
// 4. Import the runtime features and network
|
||||
let features = dump_reader.features()?.unwrap_or_default();
|
||||
index_scheduler.put_runtime_features(features)?;
|
||||
|
||||
let network = dump_reader.network()?.cloned().unwrap_or_default();
|
||||
index_scheduler.put_network(network)?;
|
||||
|
||||
let indexer_config = index_scheduler.indexer_config();
|
||||
// 4.1 Use all cpus to process dump if `max_indexing_threads` not configured
|
||||
let backup_config;
|
||||
let base_config = index_scheduler.indexer_config();
|
||||
|
||||
let indexer_config = if base_config.max_threads.is_none() {
|
||||
let (thread_pool, _) = default_thread_pool_and_threads();
|
||||
|
||||
let _config = IndexerConfig { thread_pool, ..*base_config };
|
||||
backup_config = _config;
|
||||
&backup_config
|
||||
} else {
|
||||
base_config
|
||||
};
|
||||
|
||||
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
|
||||
// try to process tasks while we're trying to import the indexes.
|
||||
|
||||
// 4. Import the indexes.
|
||||
// 5. Import the indexes.
|
||||
for index_reader in dump_reader.indexes()? {
|
||||
let mut index_reader = index_reader?;
|
||||
let metadata = index_reader.metadata();
|
||||
@ -519,20 +541,20 @@ fn import_dump(
|
||||
let mut wtxn = index.write_txn()?;
|
||||
|
||||
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
|
||||
// 4.1 Import the primary key if there is one.
|
||||
// 5.1 Import the primary key if there is one.
|
||||
if let Some(ref primary_key) = metadata.primary_key {
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
}
|
||||
|
||||
// 4.2 Import the settings.
|
||||
// 5.2 Import the settings.
|
||||
tracing::info!("Importing the settings.");
|
||||
let settings = index_reader.settings()?;
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
builder
|
||||
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
|
||||
let embedder_stats: Arc<EmbedderStats> = Default::default();
|
||||
builder.execute(&|| false, &progress, embedder_stats.clone())?;
|
||||
|
||||
// 4.3 Import the documents.
|
||||
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
// 5.3 Import the documents.
|
||||
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
tracing::info!("Importing the documents.");
|
||||
let file = tempfile::tempfile()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
||||
@ -543,11 +565,11 @@ fn import_dump(
|
||||
// This flush the content of the batch builder.
|
||||
let file = builder.into_inner()?.into_inner()?;
|
||||
|
||||
// 4.3.2 We feed it to the milli index.
|
||||
// 5.3.2 We feed it to the milli index.
|
||||
let reader = BufReader::new(file);
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
let embedder_configs = index.embedding_configs(&wtxn)?;
|
||||
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
|
||||
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
|
||||
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
@ -560,6 +582,7 @@ fn import_dump(
|
||||
},
|
||||
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
||||
|| false,
|
||||
&embedder_stats,
|
||||
)?;
|
||||
|
||||
let builder = builder.with_embedders(embedders);
|
||||
@ -574,15 +597,15 @@ fn import_dump(
|
||||
index_scheduler.refresh_index_stats(&uid)?;
|
||||
}
|
||||
|
||||
// 5. Import the queue
|
||||
// 6. Import the queue
|
||||
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
||||
// 5.1. Import the batches
|
||||
// 6.1. Import the batches
|
||||
for ret in dump_reader.batches()? {
|
||||
let batch = ret?;
|
||||
index_scheduler_dump.register_dumped_batch(batch)?;
|
||||
}
|
||||
|
||||
// 5.2. Import the tasks
|
||||
// 6.2. Import the tasks
|
||||
for ret in dump_reader.tasks()? {
|
||||
let (task, file) = ret?;
|
||||
index_scheduler_dump.register_dumped_task(task, file)?;
|
||||
|
@ -15,6 +15,33 @@ lazy_static! {
|
||||
"Meilisearch number of degraded search requests"
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_CHAT_SEARCH_REQUESTS: IntCounterVec = register_int_counter_vec!(
|
||||
opts!(
|
||||
"meilisearch_chat_search_requests",
|
||||
"Meilisearch number of search requests performed by the chat route itself"
|
||||
),
|
||||
&["type"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
|
||||
opts!("meilisearch_chat_prompt_tokens_usage", "Meilisearch Chat Prompt Tokens Usage"),
|
||||
&["workspace", "model"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE: IntCounterVec =
|
||||
register_int_counter_vec!(
|
||||
opts!(
|
||||
"meilisearch_chat_completion_tokens_usage",
|
||||
"Meilisearch Chat Completion Tokens Usage"
|
||||
),
|
||||
&["workspace", "model"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
|
||||
opts!("meilisearch_chat_total_tokens_usage", "Meilisearch Chat Total Tokens Usage"),
|
||||
&["workspace", "model"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
|
||||
.expect("Can't create a metric");
|
||||
|
@ -53,6 +53,8 @@ const MEILI_EXPERIMENTAL_DUMPLESS_UPGRADE: &str = "MEILI_EXPERIMENTAL_DUMPLESS_U
|
||||
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
|
||||
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
||||
@ -62,7 +64,7 @@ const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
|
||||
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
|
||||
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
|
||||
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE";
|
||||
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE";
|
||||
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
||||
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
|
||||
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
||||
@ -749,22 +751,43 @@ pub struct IndexerOpts {
|
||||
#[clap(skip)]
|
||||
#[serde(skip)]
|
||||
pub skip_index_budget: bool,
|
||||
|
||||
/// Experimental no edition 2024 for settings feature. For more information,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/847>
|
||||
///
|
||||
/// Enables the experimental no edition 2024 for settings feature.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_settings: bool,
|
||||
}
|
||||
|
||||
impl IndexerOpts {
|
||||
/// Exports the values to their corresponding env vars if they are not set.
|
||||
pub fn export_to_env(self) {
|
||||
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = self;
|
||||
let IndexerOpts {
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
} = self;
|
||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_MAX_INDEXING_MEMORY,
|
||||
max_indexing_memory.to_string(),
|
||||
);
|
||||
}
|
||||
export_to_env_if_not_present(
|
||||
MEILI_MAX_INDEXING_THREADS,
|
||||
max_indexing_threads.0.to_string(),
|
||||
);
|
||||
if let Some(max_indexing_threads) = max_indexing_threads.0 {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_MAX_INDEXING_THREADS,
|
||||
max_indexing_threads.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_no_edition_2024_for_settings {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS,
|
||||
experimental_no_edition_2024_for_settings.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -772,18 +795,23 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
|
||||
let thread_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|index| format!("indexing-thread:{index}"))
|
||||
.num_threads(*other.max_indexing_threads)
|
||||
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
|
||||
.num_threads(other.max_indexing_threads.unwrap_or_else(|| num_cpus::get() / 2))
|
||||
.build()?;
|
||||
|
||||
Ok(Self {
|
||||
thread_pool,
|
||||
log_every_n: Some(DEFAULT_LOG_EVERY_N),
|
||||
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
|
||||
thread_pool: Some(thread_pool),
|
||||
max_threads: *other.max_indexing_threads,
|
||||
max_positions_per_attributes: None,
|
||||
skip_index_budget: other.skip_index_budget,
|
||||
..Default::default()
|
||||
experimental_no_edition_2024_for_settings: other
|
||||
.experimental_no_edition_2024_for_settings,
|
||||
chunk_compression_type: Default::default(),
|
||||
chunk_compression_level: Default::default(),
|
||||
documents_chunk_size: Default::default(),
|
||||
max_nb_chunks: Default::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -843,31 +871,31 @@ fn total_memory_bytes() -> Option<u64> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
|
||||
pub struct MaxThreads(usize);
|
||||
#[derive(Default, Debug, Clone, Copy, Deserialize, Serialize)]
|
||||
pub struct MaxThreads(Option<usize>);
|
||||
|
||||
impl FromStr for MaxThreads {
|
||||
type Err = ParseIntError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
usize::from_str(s).map(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MaxThreads {
|
||||
fn default() -> Self {
|
||||
MaxThreads(num_cpus::get() / 2)
|
||||
fn from_str(s: &str) -> Result<MaxThreads, Self::Err> {
|
||||
if s.is_empty() || s == "unlimited" {
|
||||
return Ok(MaxThreads::default());
|
||||
}
|
||||
usize::from_str(s).map(Some).map(MaxThreads)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MaxThreads {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
match self.0 {
|
||||
Some(threads) => write!(f, "{}", threads),
|
||||
None => write!(f, "unlimited"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for MaxThreads {
|
||||
type Target = usize;
|
||||
type Target = Option<usize>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
|
135
crates/meilisearch/src/routes/chats/chat_completion_analytics.rs
Normal file
135
crates/meilisearch/src/routes/chats/chat_completion_analytics.rs
Normal file
@ -0,0 +1,135 @@
|
||||
use std::collections::BinaryHeap;
|
||||
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::analytics::Aggregate;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ChatCompletionAggregator {
|
||||
// requests
|
||||
total_received: usize,
|
||||
total_succeeded: usize,
|
||||
time_spent: BinaryHeap<usize>,
|
||||
|
||||
// chat completion specific metrics
|
||||
total_messages: usize,
|
||||
total_streamed_requests: usize,
|
||||
total_non_streamed_requests: usize,
|
||||
|
||||
// model usage tracking
|
||||
models_used: std::collections::HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl ChatCompletionAggregator {
|
||||
pub fn from_request(model: &str, message_count: usize, is_stream: bool) -> Self {
|
||||
let mut models_used = std::collections::HashMap::new();
|
||||
models_used.insert(model.to_string(), 1);
|
||||
|
||||
Self {
|
||||
total_received: 1,
|
||||
total_succeeded: 0,
|
||||
time_spent: BinaryHeap::new(),
|
||||
|
||||
total_messages: message_count,
|
||||
total_streamed_requests: if is_stream { 1 } else { 0 },
|
||||
total_non_streamed_requests: if is_stream { 0 } else { 1 },
|
||||
|
||||
models_used,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, time_spent: std::time::Duration) {
|
||||
self.total_succeeded += 1;
|
||||
self.time_spent.push(time_spent.as_millis() as usize);
|
||||
}
|
||||
}
|
||||
|
||||
impl Aggregate for ChatCompletionAggregator {
|
||||
fn event_name(&self) -> &'static str {
|
||||
"Chat Completion POST"
|
||||
}
|
||||
|
||||
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
||||
let Self {
|
||||
total_received,
|
||||
total_succeeded,
|
||||
mut time_spent,
|
||||
total_messages,
|
||||
total_streamed_requests,
|
||||
total_non_streamed_requests,
|
||||
models_used,
|
||||
..
|
||||
} = *new;
|
||||
|
||||
// Aggregate time spent
|
||||
self.time_spent.append(&mut time_spent);
|
||||
|
||||
// Aggregate counters
|
||||
self.total_received = self.total_received.saturating_add(total_received);
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
||||
self.total_messages = self.total_messages.saturating_add(total_messages);
|
||||
self.total_streamed_requests =
|
||||
self.total_streamed_requests.saturating_add(total_streamed_requests);
|
||||
self.total_non_streamed_requests =
|
||||
self.total_non_streamed_requests.saturating_add(total_non_streamed_requests);
|
||||
|
||||
// Aggregate model usage
|
||||
for (model, count) in models_used {
|
||||
*self.models_used.entry(model).or_insert(0) += count;
|
||||
}
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
fn into_event(self: Box<Self>) -> Value {
|
||||
let Self {
|
||||
total_received,
|
||||
total_succeeded,
|
||||
time_spent,
|
||||
total_messages,
|
||||
total_streamed_requests,
|
||||
total_non_streamed_requests,
|
||||
models_used,
|
||||
..
|
||||
} = *self;
|
||||
|
||||
// Compute time statistics
|
||||
let time_spent: Vec<usize> = time_spent.into_sorted_vec();
|
||||
let (max_time, min_time, avg_time) = if time_spent.is_empty() {
|
||||
(0, 0, 0)
|
||||
} else {
|
||||
let max_time = time_spent.last().unwrap_or(&0);
|
||||
let min_time = time_spent.first().unwrap_or(&0);
|
||||
let sum: usize = time_spent.iter().sum();
|
||||
let avg_time = sum / time_spent.len();
|
||||
(*max_time, *min_time, avg_time)
|
||||
};
|
||||
|
||||
// Compute average messages per request
|
||||
let avg_messages_per_request =
|
||||
if total_received > 0 { total_messages as f64 / total_received as f64 } else { 0.0 };
|
||||
|
||||
// Compute streaming vs non-streaming proportions
|
||||
let streaming_ratio = if total_received > 0 {
|
||||
total_streamed_requests as f64 / total_received as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
json!({
|
||||
"total_received": total_received,
|
||||
"total_succeeded": total_succeeded,
|
||||
"time_spent": {
|
||||
"max": max_time,
|
||||
"min": min_time,
|
||||
"avg": avg_time
|
||||
},
|
||||
"total_messages": total_messages,
|
||||
"avg_messages_per_request": avg_messages_per_request,
|
||||
"total_streamed_requests": total_streamed_requests,
|
||||
"total_non_streamed_requests": total_non_streamed_requests,
|
||||
"streaming_ratio": streaming_ratio,
|
||||
"models_used": models_used,
|
||||
})
|
||||
}
|
||||
}
|
@ -13,9 +13,9 @@ use async_openai::types::{
|
||||
ChatCompletionRequestDeveloperMessageContent, ChatCompletionRequestMessage,
|
||||
ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent,
|
||||
ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent,
|
||||
ChatCompletionStreamResponseDelta, ChatCompletionToolArgs, ChatCompletionToolType,
|
||||
CreateChatCompletionRequest, CreateChatCompletionStreamResponse, FinishReason, FunctionCall,
|
||||
FunctionCallStream, FunctionObjectArgs,
|
||||
ChatCompletionStreamOptions, ChatCompletionStreamResponseDelta, ChatCompletionToolArgs,
|
||||
ChatCompletionToolType, CreateChatCompletionRequest, CreateChatCompletionStreamResponse,
|
||||
FinishReason, FunctionCall, FunctionCallStream, FunctionObjectArgs,
|
||||
};
|
||||
use async_openai::Client;
|
||||
use bumpalo::Bump;
|
||||
@ -27,15 +27,17 @@ use meilisearch_types::features::{
|
||||
ChatCompletionPrompts as DbChatCompletionPrompts,
|
||||
ChatCompletionSource as DbChatCompletionSource, SystemRole,
|
||||
};
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::index::ChatConfig;
|
||||
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, TimeBudget};
|
||||
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget};
|
||||
use meilisearch_types::{Document, Index};
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
use tokio::runtime::Handle;
|
||||
use tokio::sync::mpsc::error::SendError;
|
||||
|
||||
use super::chat_completion_analytics::ChatCompletionAggregator;
|
||||
use super::config::Config;
|
||||
use super::errors::{MistralError, OpenAiOutsideError, StreamErrorEvent};
|
||||
use super::utils::format_documents;
|
||||
@ -43,10 +45,15 @@ use super::{
|
||||
ChatsParam, MEILI_APPEND_CONVERSATION_MESSAGE_NAME, MEILI_SEARCH_IN_INDEX_FUNCTION_NAME,
|
||||
MEILI_SEARCH_PROGRESS_NAME, MEILI_SEARCH_SOURCES_NAME,
|
||||
};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _};
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::metrics::{
|
||||
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE, MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE,
|
||||
MEILISEARCH_CHAT_SEARCH_REQUESTS, MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE,
|
||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS,
|
||||
};
|
||||
use crate::routes::chats::utils::SseEventSender;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::search::{add_search_rules, prepare_search, search_from_kind, SearchQuery};
|
||||
@ -64,6 +71,7 @@ async fn chat(
|
||||
req: HttpRequest,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
web::Json(chat_completion): web::Json<CreateChatCompletionRequest>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> impl Responder {
|
||||
let ChatsParam { workspace_uid } = chats_param.into_inner();
|
||||
|
||||
@ -76,6 +84,7 @@ async fn chat(
|
||||
&workspace_uid,
|
||||
req,
|
||||
chat_completion,
|
||||
analytics,
|
||||
)
|
||||
.await,
|
||||
)
|
||||
@ -88,6 +97,7 @@ async fn chat(
|
||||
&workspace_uid,
|
||||
req,
|
||||
chat_completion,
|
||||
analytics,
|
||||
)
|
||||
.await,
|
||||
)
|
||||
@ -160,6 +170,7 @@ fn setup_search_tool(
|
||||
|
||||
let mut index_uids = Vec::new();
|
||||
let mut function_description = prompts.search_description.clone();
|
||||
let mut filter_description = prompts.search_filter_param.clone();
|
||||
index_scheduler.try_for_each_index::<_, ()>(|name, index| {
|
||||
// Make sure to skip unauthorized indexes
|
||||
if !filters.is_index_authorized(name) {
|
||||
@ -171,16 +182,22 @@ fn setup_search_tool(
|
||||
let index_description = chat_config.description;
|
||||
let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n");
|
||||
index_uids.push(name.to_string());
|
||||
let facet_distributions = format_facet_distributions(index, &rtxn, 10).unwrap(); // TODO do not unwrap
|
||||
let _ = writeln!(&mut filter_description, "\n## Facet distributions of the {name} index");
|
||||
let _ = writeln!(&mut filter_description, "{facet_distributions}");
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
tracing::debug!("LLM function description: {function_description}");
|
||||
tracing::debug!("LLM filter description: {filter_description}");
|
||||
|
||||
let tool = ChatCompletionToolArgs::default()
|
||||
.r#type(ChatCompletionToolType::Function)
|
||||
.function(
|
||||
FunctionObjectArgs::default()
|
||||
.name(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME)
|
||||
.description(&function_description)
|
||||
.description(function_description)
|
||||
.parameters(json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@ -194,9 +211,13 @@ fn setup_search_tool(
|
||||
// "type": ["string", "null"],
|
||||
"type": "string",
|
||||
"description": prompts.search_q_param,
|
||||
},
|
||||
"filter": {
|
||||
"type": "string",
|
||||
"description": filter_description,
|
||||
}
|
||||
},
|
||||
"required": ["index_uid", "q"],
|
||||
"required": ["index_uid", "q", "filter"],
|
||||
"additionalProperties": false,
|
||||
}))
|
||||
.strict(true)
|
||||
@ -238,11 +259,19 @@ async fn process_search_request(
|
||||
auth_token: &str,
|
||||
index_uid: String,
|
||||
q: Option<String>,
|
||||
filter: Option<String>,
|
||||
) -> Result<(Index, Vec<Document>, String), ResponseError> {
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.static_read_txn()?;
|
||||
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
|
||||
let mut query = SearchQuery { q, ..SearchQuery::from(search_parameters) };
|
||||
let mut query = SearchQuery {
|
||||
q,
|
||||
filter: filter.map(serde_json::Value::from),
|
||||
..SearchQuery::from(search_parameters)
|
||||
};
|
||||
|
||||
tracing::debug!("LLM query: {:?}", query);
|
||||
|
||||
let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate(
|
||||
auth_ctrl,
|
||||
auth_token,
|
||||
@ -271,17 +300,26 @@ async fn process_search_request(
|
||||
let (search, _is_finite_pagination, _max_total_hits, _offset) =
|
||||
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
|
||||
|
||||
search_from_kind(index_uid, search_kind, search)
|
||||
.map(|(search_results, _)| (rtxn, search_results))
|
||||
.map_err(ResponseError::from)
|
||||
match search_from_kind(index_uid, search_kind, search) {
|
||||
Ok((search_results, _)) => Ok((rtxn, Ok(search_results))),
|
||||
Err(MeilisearchHttpError::Milli {
|
||||
error: meilisearch_types::milli::Error::UserError(user_error),
|
||||
index_name: _,
|
||||
}) => Ok((rtxn, Err(user_error))),
|
||||
Err(err) => Err(ResponseError::from(err)),
|
||||
}
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
|
||||
let output = output?;
|
||||
let output = match output? {
|
||||
Ok((rtxn, Ok(search_results))) => Ok((rtxn, search_results)),
|
||||
Ok((_rtxn, Err(error))) => return Ok((index, Vec::new(), error.to_string())),
|
||||
Err(err) => Err(err),
|
||||
};
|
||||
let mut documents = Vec::new();
|
||||
if let Ok((ref rtxn, ref search_result)) = output {
|
||||
// aggregate.succeed(search_result);
|
||||
MEILISEARCH_CHAT_SEARCH_REQUESTS.with_label_values(&["internal"]).inc();
|
||||
if search_result.degraded {
|
||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
||||
}
|
||||
@ -315,9 +353,18 @@ async fn non_streamed_chat(
|
||||
workspace_uid: &str,
|
||||
req: HttpRequest,
|
||||
chat_completion: CreateChatCompletionRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_chat_completions("using the /chats chat completions route")?;
|
||||
|
||||
// Create analytics aggregator
|
||||
let aggregate = ChatCompletionAggregator::from_request(
|
||||
&chat_completion.model,
|
||||
chat_completion.messages.len(),
|
||||
false, // non_streamed_chat is not streaming
|
||||
);
|
||||
let start_time = std::time::Instant::now();
|
||||
|
||||
if let Some(n) = chat_completion.n.filter(|&n| n != 1) {
|
||||
return Err(ResponseError::from_msg(
|
||||
format!("You tried to specify n = {n} but only single choices are supported (n = 1)."),
|
||||
@ -377,16 +424,19 @@ async fn non_streamed_chat(
|
||||
|
||||
for call in meili_calls {
|
||||
let result = match serde_json::from_str(&call.function.arguments) {
|
||||
Ok(SearchInIndexParameters { index_uid, q }) => process_search_request(
|
||||
&index_scheduler,
|
||||
auth_ctrl.clone(),
|
||||
&search_queue,
|
||||
auth_token,
|
||||
index_uid,
|
||||
q,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| e.to_string()),
|
||||
Ok(SearchInIndexParameters { index_uid, q, filter }) => {
|
||||
process_search_request(
|
||||
&index_scheduler,
|
||||
auth_ctrl.clone(),
|
||||
&search_queue,
|
||||
auth_token,
|
||||
index_uid,
|
||||
q,
|
||||
filter,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| e.to_string())
|
||||
}
|
||||
Err(err) => Err(err.to_string()),
|
||||
};
|
||||
|
||||
@ -414,6 +464,11 @@ async fn non_streamed_chat(
|
||||
}
|
||||
}
|
||||
|
||||
// Record success in analytics
|
||||
let mut aggregate = aggregate;
|
||||
aggregate.succeed(start_time.elapsed());
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
Ok(HttpResponse::Ok().json(response))
|
||||
}
|
||||
|
||||
@ -424,6 +479,7 @@ async fn streamed_chat(
|
||||
workspace_uid: &str,
|
||||
req: HttpRequest,
|
||||
mut chat_completion: CreateChatCompletionRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<impl Responder, ResponseError> {
|
||||
index_scheduler.features().check_chat_completions("using the /chats chat completions route")?;
|
||||
let filters = index_scheduler.filters();
|
||||
@ -445,6 +501,14 @@ async fn streamed_chat(
|
||||
}
|
||||
};
|
||||
|
||||
// Create analytics aggregator
|
||||
let mut aggregate = ChatCompletionAggregator::from_request(
|
||||
&chat_completion.model,
|
||||
chat_completion.messages.len(),
|
||||
true, // streamed_chat is always streaming
|
||||
);
|
||||
let start_time = std::time::Instant::now();
|
||||
|
||||
let config = Config::new(&chat_settings);
|
||||
let auth_token = extract_token_from_request(&req)?.unwrap().to_string();
|
||||
let system_role = chat_settings.source.system_role(&chat_completion.model);
|
||||
@ -460,6 +524,7 @@ async fn streamed_chat(
|
||||
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(10);
|
||||
let tx = SseEventSender::new(tx);
|
||||
let workspace_uid = workspace_uid.to_string();
|
||||
let _join_handle = Handle::current().spawn(async move {
|
||||
let client = Client::with_config(config.clone());
|
||||
let mut global_tool_calls = HashMap::<u32, Call>::new();
|
||||
@ -469,6 +534,7 @@ async fn streamed_chat(
|
||||
let output = run_conversation(
|
||||
&index_scheduler,
|
||||
&auth_ctrl,
|
||||
&workspace_uid,
|
||||
&search_queue,
|
||||
&auth_token,
|
||||
&client,
|
||||
@ -490,6 +556,10 @@ async fn streamed_chat(
|
||||
let _ = tx.stop().await;
|
||||
});
|
||||
|
||||
// Record success in analytics after the stream is set up
|
||||
aggregate.succeed(start_time.elapsed());
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
Ok(Sse::from_infallible_receiver(rx).with_retry_duration(Duration::from_secs(10)))
|
||||
}
|
||||
|
||||
@ -502,6 +572,7 @@ async fn run_conversation<C: async_openai::config::Config>(
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
auth_ctrl: &web::Data<AuthController>,
|
||||
workspace_uid: &str,
|
||||
search_queue: &web::Data<SearchQueue>,
|
||||
auth_token: &str,
|
||||
client: &Client<C>,
|
||||
@ -511,13 +582,34 @@ async fn run_conversation<C: async_openai::config::Config>(
|
||||
global_tool_calls: &mut HashMap<u32, Call>,
|
||||
function_support: FunctionSupport,
|
||||
) -> Result<ControlFlow<Option<FinishReason>, ()>, SendError<Event>> {
|
||||
use DbChatCompletionSource::*;
|
||||
|
||||
let mut finish_reason = None;
|
||||
chat_completion.stream_options = match source {
|
||||
OpenAi | AzureOpenAi => Some(ChatCompletionStreamOptions { include_usage: true }),
|
||||
Mistral | VLlm => None,
|
||||
};
|
||||
|
||||
// safety: unwrap: can only happens if `stream` was set to `false`
|
||||
let mut response = client.chat().create_stream(chat_completion.clone()).await.unwrap();
|
||||
while let Some(result) = response.next().await {
|
||||
match result {
|
||||
Ok(resp) => {
|
||||
let choice = &resp.choices[0];
|
||||
if let Some(usage) = resp.usage.as_ref() {
|
||||
MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE
|
||||
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||
.inc_by(usage.prompt_tokens as u64);
|
||||
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE
|
||||
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||
.inc_by(usage.completion_tokens as u64);
|
||||
MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE
|
||||
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||
.inc_by(usage.total_tokens as u64);
|
||||
}
|
||||
let choice = match resp.choices.first() {
|
||||
Some(choice) => choice,
|
||||
None => break,
|
||||
};
|
||||
finish_reason = choice.finish_reason;
|
||||
|
||||
let ChatCompletionStreamResponseDelta { ref tool_calls, .. } = &choice.delta;
|
||||
@ -659,13 +751,14 @@ async fn handle_meili_tools(
|
||||
let mut error = None;
|
||||
|
||||
let result = match serde_json::from_str(&call.function.arguments) {
|
||||
Ok(SearchInIndexParameters { index_uid, q }) => match process_search_request(
|
||||
Ok(SearchInIndexParameters { index_uid, q, filter }) => match process_search_request(
|
||||
index_scheduler,
|
||||
auth_ctrl.clone(),
|
||||
search_queue,
|
||||
auth_token,
|
||||
index_uid,
|
||||
q,
|
||||
filter,
|
||||
)
|
||||
.await
|
||||
{
|
||||
@ -741,4 +834,42 @@ struct SearchInIndexParameters {
|
||||
index_uid: String,
|
||||
/// The query parameter to use.
|
||||
q: Option<String>,
|
||||
/// The filter parameter to use.
|
||||
filter: Option<String>,
|
||||
}
|
||||
|
||||
fn format_facet_distributions(
|
||||
index: &Index,
|
||||
rtxn: &RoTxn,
|
||||
max_values_per_facet: usize,
|
||||
) -> meilisearch_types::milli::Result<String> {
|
||||
let universe = index.documents_ids(rtxn)?;
|
||||
let rules = index.filterable_attributes_rules(rtxn)?;
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let filterable_attributes = fields_ids_map
|
||||
.names()
|
||||
.filter(|name| rules.iter().any(|rule| matches!(rule.match_str(name), PatternMatch::Match)))
|
||||
.map(|name| (name, OrderBy::Count));
|
||||
let facets_distribution = index
|
||||
.facets_distribution(rtxn)
|
||||
.max_values_per_facet(max_values_per_facet)
|
||||
.candidates(universe)
|
||||
.facets(filterable_attributes)
|
||||
.execute()?;
|
||||
|
||||
let mut output = String::new();
|
||||
for (facet_name, entries) in facets_distribution {
|
||||
let _ = write!(&mut output, "{}: ", facet_name);
|
||||
let total_entries = entries.len();
|
||||
for (i, (value, _count)) in entries.into_iter().enumerate() {
|
||||
let _ = if total_entries.saturating_sub(1) == i {
|
||||
write!(&mut output, "{value}.")
|
||||
} else {
|
||||
write!(&mut output, "{value}, ")
|
||||
};
|
||||
}
|
||||
let _ = writeln!(&mut output);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ impl Config {
|
||||
pub fn new(chat_settings: &DbChatSettings) -> Self {
|
||||
use meilisearch_types::features::ChatCompletionSource::*;
|
||||
match chat_settings.source {
|
||||
OpenAi | Mistral | Gemini | VLlm => {
|
||||
OpenAi | Mistral | VLlm => {
|
||||
let mut config = OpenAIConfig::default();
|
||||
if let Some(org_id) = chat_settings.org_id.as_ref() {
|
||||
config = config.with_org_id(org_id);
|
||||
|
@ -19,6 +19,7 @@ use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::PAGINATION_DEFAULT_LIMIT;
|
||||
|
||||
mod chat_completion_analytics;
|
||||
pub mod chat_completions;
|
||||
mod config;
|
||||
mod errors;
|
||||
|
@ -8,8 +8,8 @@ use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::features::{
|
||||
ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSettings,
|
||||
ChatCompletionSource as DbChatCompletionSource, DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT,
|
||||
DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT,
|
||||
DEFAULT_CHAT_SYSTEM_PROMPT,
|
||||
DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT,
|
||||
DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT, DEFAULT_CHAT_SYSTEM_PROMPT,
|
||||
};
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
@ -84,6 +84,11 @@ async fn patch_settings(
|
||||
Setting::Reset => DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
|
||||
Setting::NotSet => old_settings.prompts.search_q_param,
|
||||
},
|
||||
search_filter_param: match new_prompts.search_filter_param {
|
||||
Setting::Set(new_description) => new_description,
|
||||
Setting::Reset => DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
|
||||
Setting::NotSet => old_settings.prompts.search_filter_param,
|
||||
},
|
||||
search_index_uid_param: match new_prompts.search_index_uid_param {
|
||||
Setting::Set(new_description) => new_description,
|
||||
Setting::Reset => DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
|
||||
@ -218,7 +223,6 @@ pub enum ChatCompletionSource {
|
||||
#[default]
|
||||
OpenAi,
|
||||
Mistral,
|
||||
Gemini,
|
||||
AzureOpenAi,
|
||||
VLlm,
|
||||
}
|
||||
@ -229,7 +233,6 @@ impl From<ChatCompletionSource> for DbChatCompletionSource {
|
||||
match source {
|
||||
OpenAi => DbChatCompletionSource::OpenAi,
|
||||
Mistral => DbChatCompletionSource::Mistral,
|
||||
Gemini => DbChatCompletionSource::Gemini,
|
||||
AzureOpenAi => DbChatCompletionSource::AzureOpenAi,
|
||||
VLlm => DbChatCompletionSource::VLlm,
|
||||
}
|
||||
@ -254,6 +257,10 @@ pub struct ChatPrompts {
|
||||
#[schema(value_type = Option<String>, example = json!("This is query parameter..."))]
|
||||
pub search_q_param: Setting<String>,
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchFilterParamPrompt>)]
|
||||
#[schema(value_type = Option<String>, example = json!("This is filter parameter..."))]
|
||||
pub search_filter_param: Setting<String>,
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchIndexUidParamPrompt>)]
|
||||
#[schema(value_type = Option<String>, example = json!("This is index you want to search in..."))]
|
||||
pub search_index_uid_param: Setting<String>,
|
||||
|
183
crates/meilisearch/src/routes/export.rs
Normal file
183
crates/meilisearch/src/routes/export.rs
Normal file
@ -0,0 +1,183 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::convert::Infallible;
|
||||
use std::str::FromStr as _;
|
||||
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use byte_unit::Byte;
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::Deserr;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::tasks::{ExportIndexSettings as DbExportIndexSettings, KindWithContent};
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::export_analytics::ExportAnalytics;
|
||||
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::Opt;
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
paths(export),
|
||||
tags((
|
||||
name = "Export",
|
||||
description = "The `/export` route allows you to trigger an export process to a remote Meilisearch instance.",
|
||||
external_docs(url = "https://www.meilisearch.com/docs/reference/api/export"),
|
||||
)),
|
||||
)]
|
||||
pub struct ExportApi;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(export)));
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "",
|
||||
tag = "Export",
|
||||
security(("Bearer" = ["export", "*"])),
|
||||
responses(
|
||||
(status = 202, description = "Export successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
|
||||
{
|
||||
"taskUid": 1,
|
||||
"status": "enqueued",
|
||||
"type": "export",
|
||||
"enqueuedAt": "2021-08-11T09:25:53.000000Z"
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||
"code": "missing_authorization_header",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||
}
|
||||
)),
|
||||
)
|
||||
)]
|
||||
async fn export(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::EXPORT }>, Data<IndexScheduler>>,
|
||||
export: AwebJson<Export, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let export = export.into_inner();
|
||||
debug!(returns = ?export, "Trigger export");
|
||||
|
||||
let analytics_aggregate = ExportAnalytics::from_export(&export);
|
||||
|
||||
let Export { url, api_key, payload_size, indexes } = export;
|
||||
|
||||
let indexes = match indexes {
|
||||
Some(indexes) => indexes
|
||||
.into_iter()
|
||||
.map(|(pattern, ExportIndexSettings { filter, override_settings })| {
|
||||
(pattern, DbExportIndexSettings { filter, override_settings })
|
||||
})
|
||||
.collect(),
|
||||
None => BTreeMap::from([(
|
||||
IndexUidPattern::new_unchecked("*"),
|
||||
DbExportIndexSettings::default(),
|
||||
)]),
|
||||
};
|
||||
|
||||
let task = KindWithContent::Export {
|
||||
url,
|
||||
api_key,
|
||||
payload_size: payload_size.map(|ByteWithDeserr(bytes)| bytes),
|
||||
indexes,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
analytics.publish(analytics_aggregate, &req);
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct Export {
|
||||
#[schema(value_type = Option<String>, example = json!("https://ms-1234.heaven.meilisearch.com"))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidExportUrl>)]
|
||||
pub url: String,
|
||||
#[schema(value_type = Option<String>, example = json!("1234abcd"))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidExportApiKey>)]
|
||||
pub api_key: Option<String>,
|
||||
#[schema(value_type = Option<String>, example = json!("24MiB"))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidExportPayloadSize>)]
|
||||
pub payload_size: Option<ByteWithDeserr>,
|
||||
#[schema(value_type = Option<BTreeMap<String, ExportIndexSettings>>, example = json!({ "*": { "filter": null } }))]
|
||||
#[deserr(default)]
|
||||
#[serde(default)]
|
||||
pub indexes: Option<BTreeMap<IndexUidPattern, ExportIndexSettings>>,
|
||||
}
|
||||
|
||||
/// A wrapper around the `Byte` type that implements `Deserr`.
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct ByteWithDeserr(pub Byte);
|
||||
|
||||
impl<E> deserr::Deserr<E> for ByteWithDeserr
|
||||
where
|
||||
E: deserr::DeserializeError,
|
||||
{
|
||||
fn deserialize_from_value<V: deserr::IntoValue>(
|
||||
value: deserr::Value<V>,
|
||||
location: deserr::ValuePointerRef,
|
||||
) -> Result<Self, E> {
|
||||
use deserr::{ErrorKind, Value, ValueKind};
|
||||
match value {
|
||||
Value::Integer(integer) => Ok(ByteWithDeserr(Byte::from_u64(integer))),
|
||||
Value::String(string) => Byte::from_str(&string).map(ByteWithDeserr).map_err(|e| {
|
||||
deserr::take_cf_content(E::error::<Infallible>(
|
||||
None,
|
||||
ErrorKind::Unexpected { msg: e.to_string() },
|
||||
location,
|
||||
))
|
||||
}),
|
||||
actual => Err(deserr::take_cf_content(E::error(
|
||||
None,
|
||||
ErrorKind::IncorrectValueKind {
|
||||
actual,
|
||||
accepted: &[ValueKind::Integer, ValueKind::String],
|
||||
},
|
||||
location,
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct ExportIndexSettings {
|
||||
#[schema(value_type = Option<String>, example = json!("genres = action"))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidExportIndexFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[schema(value_type = Option<bool>, example = json!(true))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidExportIndexOverrideSettings>)]
|
||||
pub override_settings: bool,
|
||||
}
|
111
crates/meilisearch/src/routes/export_analytics.rs
Normal file
111
crates/meilisearch/src/routes/export_analytics.rs
Normal file
@ -0,0 +1,111 @@
|
||||
use url::Url;
|
||||
|
||||
use crate::analytics::Aggregate;
|
||||
use crate::routes::export::Export;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ExportAnalytics {
|
||||
total_received: usize,
|
||||
has_api_key: bool,
|
||||
sum_exports_meilisearch_cloud: usize,
|
||||
sum_index_patterns: usize,
|
||||
sum_patterns_with_filter: usize,
|
||||
sum_patterns_with_override_settings: usize,
|
||||
payload_sizes: Vec<u64>,
|
||||
}
|
||||
|
||||
impl ExportAnalytics {
|
||||
pub fn from_export(export: &Export) -> Self {
|
||||
let Export { url, api_key, payload_size, indexes } = export;
|
||||
|
||||
let url = Url::parse(url).ok();
|
||||
let is_meilisearch_cloud = url.as_ref().and_then(Url::host_str).is_some_and(|host| {
|
||||
host.ends_with("meilisearch.dev")
|
||||
|| host.ends_with("meilisearch.com")
|
||||
|| host.ends_with("meilisearch.io")
|
||||
});
|
||||
let has_api_key = api_key.is_some();
|
||||
let index_patterns_count = indexes.as_ref().map_or(0, |indexes| indexes.len());
|
||||
let patterns_with_filter_count = indexes.as_ref().map_or(0, |indexes| {
|
||||
indexes.values().filter(|settings| settings.filter.is_some()).count()
|
||||
});
|
||||
let patterns_with_override_settings_count = indexes.as_ref().map_or(0, |indexes| {
|
||||
indexes.values().filter(|settings| settings.override_settings).count()
|
||||
});
|
||||
let payload_sizes =
|
||||
if let Some(crate::routes::export::ByteWithDeserr(byte_size)) = payload_size {
|
||||
vec![byte_size.as_u64()]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
Self {
|
||||
total_received: 1,
|
||||
has_api_key,
|
||||
sum_exports_meilisearch_cloud: is_meilisearch_cloud as usize,
|
||||
sum_index_patterns: index_patterns_count,
|
||||
sum_patterns_with_filter: patterns_with_filter_count,
|
||||
sum_patterns_with_override_settings: patterns_with_override_settings_count,
|
||||
payload_sizes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Aggregate for ExportAnalytics {
|
||||
fn event_name(&self) -> &'static str {
|
||||
"Export Triggered"
|
||||
}
|
||||
|
||||
fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
|
||||
self.total_received += other.total_received;
|
||||
self.has_api_key |= other.has_api_key;
|
||||
self.sum_exports_meilisearch_cloud += other.sum_exports_meilisearch_cloud;
|
||||
self.sum_index_patterns += other.sum_index_patterns;
|
||||
self.sum_patterns_with_filter += other.sum_patterns_with_filter;
|
||||
self.sum_patterns_with_override_settings += other.sum_patterns_with_override_settings;
|
||||
self.payload_sizes.extend(other.payload_sizes);
|
||||
self
|
||||
}
|
||||
|
||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
||||
let avg_payload_size = if self.payload_sizes.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(self.payload_sizes.iter().sum::<u64>() / self.payload_sizes.len() as u64)
|
||||
};
|
||||
|
||||
let avg_exports_meilisearch_cloud = if self.total_received == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(self.sum_exports_meilisearch_cloud as f64 / self.total_received as f64)
|
||||
};
|
||||
|
||||
let avg_index_patterns = if self.total_received == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(self.sum_index_patterns as f64 / self.total_received as f64)
|
||||
};
|
||||
|
||||
let avg_patterns_with_filter = if self.total_received == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(self.sum_patterns_with_filter as f64 / self.total_received as f64)
|
||||
};
|
||||
|
||||
let avg_patterns_with_override_settings = if self.total_received == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(self.sum_patterns_with_override_settings as f64 / self.total_received as f64)
|
||||
};
|
||||
|
||||
serde_json::json!({
|
||||
"total_received": self.total_received,
|
||||
"has_api_key": self.has_api_key,
|
||||
"avg_exports_meilisearch_cloud": avg_exports_meilisearch_cloud,
|
||||
"avg_index_patterns": avg_index_patterns,
|
||||
"avg_patterns_with_filter": avg_patterns_with_filter,
|
||||
"avg_patterns_with_override_settings": avg_patterns_with_override_settings,
|
||||
"avg_payload_size": avg_payload_size,
|
||||
})
|
||||
}
|
||||
}
|
@ -54,6 +54,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
get_task_documents_route: Some(false),
|
||||
composite_embedders: Some(false),
|
||||
chat_completions: Some(false),
|
||||
multimodal: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@ -100,6 +101,8 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub composite_embedders: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub chat_completions: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub multimodal: Option<bool>,
|
||||
}
|
||||
|
||||
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
|
||||
@ -113,6 +116,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
} = value;
|
||||
|
||||
Self {
|
||||
@ -124,6 +128,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
get_task_documents_route: Some(get_task_documents_route),
|
||||
composite_embedders: Some(composite_embedders),
|
||||
chat_completions: Some(chat_completions),
|
||||
multimodal: Some(multimodal),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -138,6 +143,7 @@ pub struct PatchExperimentalFeatureAnalytics {
|
||||
get_task_documents_route: bool,
|
||||
composite_embedders: bool,
|
||||
chat_completions: bool,
|
||||
multimodal: bool,
|
||||
}
|
||||
|
||||
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
@ -155,6 +161,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
get_task_documents_route: new.get_task_documents_route,
|
||||
composite_embedders: new.composite_embedders,
|
||||
chat_completions: new.chat_completions,
|
||||
multimodal: new.multimodal,
|
||||
})
|
||||
}
|
||||
|
||||
@ -181,6 +188,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
get_task_documents_route: Some(false),
|
||||
composite_embedders: Some(false),
|
||||
chat_completions: Some(false),
|
||||
multimodal: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@ -223,6 +231,7 @@ async fn patch_features(
|
||||
.composite_embedders
|
||||
.unwrap_or(old_features.composite_embedders),
|
||||
chat_completions: new_features.0.chat_completions.unwrap_or(old_features.chat_completions),
|
||||
multimodal: new_features.0.multimodal.unwrap_or(old_features.multimodal),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
@ -237,6 +246,7 @@ async fn patch_features(
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
@ -249,6 +259,7 @@ async fn patch_features(
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::collections::HashSet;
|
||||
use std::io::{ErrorKind, Seek as _};
|
||||
use std::marker::PhantomData;
|
||||
use std::str::FromStr;
|
||||
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::web::Data;
|
||||
@ -17,9 +18,10 @@ use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::documents::sort::recursive_sort;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||
use meilisearch_types::milli::DocumentId;
|
||||
use meilisearch_types::milli::{AscDesc, DocumentId};
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use meilisearch_types::star_or::OptionStarOrList;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
@ -42,6 +44,7 @@ use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::search::fix_sort_query_parameters;
|
||||
use crate::routes::{
|
||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||
};
|
||||
@ -135,6 +138,8 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
|
||||
per_document_id: bool,
|
||||
// if a filter was used
|
||||
per_filter: bool,
|
||||
// if documents were sorted
|
||||
sort: bool,
|
||||
|
||||
#[serde(rename = "vector.retrieve_vectors")]
|
||||
retrieve_vectors: bool,
|
||||
@ -151,39 +156,6 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
|
||||
marker: std::marker::PhantomData<Method>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum DocumentFetchKind {
|
||||
PerDocumentId { retrieve_vectors: bool },
|
||||
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize },
|
||||
}
|
||||
|
||||
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
|
||||
pub fn from_query(query: &DocumentFetchKind) -> Self {
|
||||
let (limit, offset, retrieve_vectors) = match query {
|
||||
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
|
||||
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
|
||||
(*limit, *offset, *retrieve_vectors)
|
||||
}
|
||||
};
|
||||
|
||||
let ids = match query {
|
||||
DocumentFetchKind::Normal { ids, .. } => *ids,
|
||||
DocumentFetchKind::PerDocumentId { .. } => 0,
|
||||
};
|
||||
|
||||
Self {
|
||||
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
|
||||
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
|
||||
max_limit: limit,
|
||||
max_offset: offset,
|
||||
retrieve_vectors,
|
||||
max_document_ids: ids,
|
||||
|
||||
marker: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
|
||||
fn event_name(&self) -> &'static str {
|
||||
Method::event_name()
|
||||
@ -193,6 +165,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
|
||||
Box::new(Self {
|
||||
per_document_id: self.per_document_id | new.per_document_id,
|
||||
per_filter: self.per_filter | new.per_filter,
|
||||
sort: self.sort | new.sort,
|
||||
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
|
||||
max_limit: self.max_limit.max(new.max_limit),
|
||||
max_offset: self.max_offset.max(new.max_offset),
|
||||
@ -276,6 +249,7 @@ pub async fn get_document(
|
||||
retrieve_vectors: param_retrieve_vectors.0,
|
||||
per_document_id: true,
|
||||
per_filter: false,
|
||||
sort: false,
|
||||
max_limit: 0,
|
||||
max_offset: 0,
|
||||
max_document_ids: 0,
|
||||
@ -406,6 +380,8 @@ pub struct BrowseQueryGet {
|
||||
#[param(default, value_type = Option<String>, example = "popularity > 1000")]
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
|
||||
filter: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentSort>)]
|
||||
sort: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema)]
|
||||
@ -430,6 +406,9 @@ pub struct BrowseQuery {
|
||||
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
||||
filter: Option<Value>,
|
||||
#[schema(default, value_type = Option<Vec<String>>, example = json!(["title:asc", "rating:desc"]))]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentSort>)]
|
||||
sort: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
/// Get documents with POST
|
||||
@ -495,6 +474,7 @@ pub async fn documents_by_query_post(
|
||||
analytics.publish(
|
||||
DocumentsFetchAggregator::<DocumentsPOST> {
|
||||
per_filter: body.filter.is_some(),
|
||||
sort: body.sort.is_some(),
|
||||
retrieve_vectors: body.retrieve_vectors,
|
||||
max_limit: body.limit,
|
||||
max_offset: body.offset,
|
||||
@ -571,7 +551,7 @@ pub async fn get_documents(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?params, "Get documents GET");
|
||||
|
||||
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } =
|
||||
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids, sort } =
|
||||
params.into_inner();
|
||||
|
||||
let filter = match filter {
|
||||
@ -582,20 +562,20 @@ pub async fn get_documents(
|
||||
None => None,
|
||||
};
|
||||
|
||||
let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect());
|
||||
|
||||
let query = BrowseQuery {
|
||||
offset: offset.0,
|
||||
limit: limit.0,
|
||||
fields: fields.merge_star_and_none(),
|
||||
retrieve_vectors: retrieve_vectors.0,
|
||||
filter,
|
||||
ids,
|
||||
ids: ids.map(|ids| ids.into_iter().map(Into::into).collect()),
|
||||
sort: sort.map(|attr| fix_sort_query_parameters(&attr)),
|
||||
};
|
||||
|
||||
analytics.publish(
|
||||
DocumentsFetchAggregator::<DocumentsGET> {
|
||||
per_filter: query.filter.is_some(),
|
||||
sort: query.sort.is_some(),
|
||||
retrieve_vectors: query.retrieve_vectors,
|
||||
max_limit: query.limit,
|
||||
max_offset: query.offset,
|
||||
@ -615,7 +595,7 @@ fn documents_by_query(
|
||||
query: BrowseQuery,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query;
|
||||
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids, sort } = query;
|
||||
|
||||
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
|
||||
|
||||
@ -633,6 +613,18 @@ fn documents_by_query(
|
||||
None
|
||||
};
|
||||
|
||||
let sort_criteria = if let Some(sort) = &sort {
|
||||
let sorts: Vec<_> = match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
|
||||
Ok(sorts) => sorts,
|
||||
Err(asc_desc_error) => {
|
||||
return Err(milli::SortError::from(asc_desc_error).into_document_error().into())
|
||||
}
|
||||
};
|
||||
Some(sorts)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let (total, documents) = retrieve_documents(
|
||||
&index,
|
||||
@ -643,6 +635,7 @@ fn documents_by_query(
|
||||
fields,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
sort_criteria,
|
||||
)?;
|
||||
|
||||
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
||||
@ -1452,7 +1445,6 @@ fn some_documents<'a, 't: 'a>(
|
||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||
|
||||
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
||||
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
|
||||
@ -1468,15 +1460,9 @@ fn some_documents<'a, 't: 'a>(
|
||||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
};
|
||||
for (name, vector) in index.embeddings(rtxn, key)? {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(key));
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(vector.into()),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
for (name, (vector, regenerate)) in index.embeddings(rtxn, key)? {
|
||||
let embeddings =
|
||||
ExplicitVectors { embeddings: Some(vector.into()), regenerate };
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
||||
@ -1501,6 +1487,7 @@ fn retrieve_documents<S: AsRef<str>>(
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
features: RoFeatures,
|
||||
sort_criteria: Option<Vec<AscDesc>>,
|
||||
) -> Result<(u64, Vec<Document>), ResponseError> {
|
||||
let rtxn = index.read_txn()?;
|
||||
let filter = &filter;
|
||||
@ -1533,15 +1520,32 @@ fn retrieve_documents<S: AsRef<str>>(
|
||||
})?
|
||||
}
|
||||
|
||||
let (it, number_of_documents) = {
|
||||
let (it, number_of_documents) = if let Some(sort) = sort_criteria {
|
||||
let number_of_documents = candidates.len();
|
||||
let facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?;
|
||||
let iter = facet_sort.iter()?;
|
||||
let mut documents = Vec::with_capacity(limit);
|
||||
for result in iter.skip(offset).take(limit) {
|
||||
documents.push(result?);
|
||||
}
|
||||
(
|
||||
itertools::Either::Left(some_documents(
|
||||
index,
|
||||
&rtxn,
|
||||
documents.into_iter(),
|
||||
retrieve_vectors,
|
||||
)?),
|
||||
number_of_documents,
|
||||
)
|
||||
} else {
|
||||
let number_of_documents = candidates.len();
|
||||
(
|
||||
some_documents(
|
||||
itertools::Either::Right(some_documents(
|
||||
index,
|
||||
&rtxn,
|
||||
candidates.into_iter().skip(offset).take(limit),
|
||||
retrieve_vectors,
|
||||
)?,
|
||||
)?),
|
||||
number_of_documents,
|
||||
)
|
||||
};
|
||||
|
@ -56,6 +56,8 @@ pub struct FacetSearchQuery {
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMedia>)]
|
||||
pub media: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
@ -94,6 +96,7 @@ impl FacetSearchAggregator {
|
||||
facet_name,
|
||||
vector,
|
||||
q,
|
||||
media,
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
@ -108,6 +111,7 @@ impl FacetSearchAggregator {
|
||||
facet_names: Some(facet_name.clone()).into_iter().collect(),
|
||||
additional_search_parameters_provided: q.is_some()
|
||||
|| vector.is_some()
|
||||
|| media.is_some()
|
||||
|| filter.is_some()
|
||||
|| *matching_strategy != MatchingStrategy::default()
|
||||
|| attributes_to_search_on.is_some()
|
||||
@ -291,6 +295,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
facet_name: _,
|
||||
q,
|
||||
vector,
|
||||
media,
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
@ -312,6 +317,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
|
||||
SearchQuery {
|
||||
q,
|
||||
media,
|
||||
offset: DEFAULT_SEARCH_OFFSET(),
|
||||
limit: DEFAULT_SEARCH_LIMIT(),
|
||||
page,
|
||||
|
@ -205,6 +205,8 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
|
||||
Ok(Self {
|
||||
q: other.q,
|
||||
// `media` not supported for `GET`
|
||||
media: None,
|
||||
vector: other.vector.map(CS::into_inner),
|
||||
offset: other.offset.0,
|
||||
limit: other.limit.0,
|
||||
@ -481,28 +483,30 @@ pub fn search_kind(
|
||||
index_uid: String,
|
||||
index: &milli::Index,
|
||||
) -> Result<SearchKind, ResponseError> {
|
||||
let is_placeholder_query =
|
||||
if let Some(q) = query.q.as_deref() { q.trim().is_empty() } else { true };
|
||||
let non_placeholder_query = !is_placeholder_query;
|
||||
let is_media = query.media.is_some();
|
||||
// handle with care, the order of cases matters, the semantics is subtle
|
||||
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
|
||||
// empty query, no vector => placeholder search
|
||||
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
|
||||
// no query, no vector => placeholder search
|
||||
(None, _, None) => Ok(SearchKind::KeywordOnly),
|
||||
// hybrid.semantic_ratio == 1.0 => vector
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// hybrid.semantic_ratio == 0.0 => keyword
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||
match (is_media, non_placeholder_query, &query.hybrid, query.vector.as_deref()) {
|
||||
// media + vector => error
|
||||
(true, _, _, Some(_)) => Err(MeilisearchHttpError::MediaAndVector.into()),
|
||||
// media + !hybrid => error
|
||||
(true, _, None, _) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
// vector + !hybrid => error
|
||||
(_, _, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
// hybrid S0 => keyword
|
||||
(_, _, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||
Ok(SearchKind::KeywordOnly)
|
||||
}
|
||||
// no query, hybrid, vector => semantic
|
||||
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, Some(v.len()))
|
||||
// !q + !vector => placeholder search
|
||||
(false, false, _, None) => Ok(SearchKind::KeywordOnly),
|
||||
// hybrid S100 => semantic
|
||||
(_, _, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// query, no hybrid, no vector => keyword
|
||||
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
|
||||
// query, hybrid, maybe vector => hybrid
|
||||
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||
// q + hybrid => hybrid
|
||||
(_, true, Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
index,
|
||||
@ -510,7 +514,11 @@ pub fn search_kind(
|
||||
**semantic_ratio,
|
||||
v.map(|v| v.len()),
|
||||
),
|
||||
|
||||
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
// !q + hybrid => semantic
|
||||
(_, false, Some(HybridQuery { semantic_ratio: _, embedder }), v) => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// q => keyword
|
||||
(false, true, None, None) => Ok(SearchKind::KeywordOnly),
|
||||
}
|
||||
}
|
||||
|
@ -61,6 +61,8 @@ pub struct SearchAggregator<Method: AggregateMethod> {
|
||||
semantic_ratio: bool,
|
||||
hybrid: bool,
|
||||
retrieve_vectors: bool,
|
||||
// Number of requests containing `media`
|
||||
total_media: usize,
|
||||
|
||||
// every time a search is done, we increment the counter linked to the used settings
|
||||
matching_strategy: HashMap<String, usize>,
|
||||
@ -101,6 +103,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
let SearchQuery {
|
||||
q,
|
||||
vector,
|
||||
media,
|
||||
offset,
|
||||
limit,
|
||||
page,
|
||||
@ -175,6 +178,11 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
if let Some(ref vector) = vector {
|
||||
ret.max_vector_size = vector.len();
|
||||
}
|
||||
|
||||
if media.is_some() {
|
||||
ret.total_media = 1;
|
||||
}
|
||||
|
||||
ret.retrieve_vectors |= retrieve_vectors;
|
||||
|
||||
if query.is_finite_pagination() {
|
||||
@ -277,6 +285,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
hybrid,
|
||||
total_media,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
@ -327,6 +336,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
self.retrieve_vectors |= retrieve_vectors;
|
||||
self.semantic_ratio |= semantic_ratio;
|
||||
self.hybrid |= hybrid;
|
||||
self.total_media += total_media;
|
||||
|
||||
// pagination
|
||||
self.max_limit = self.max_limit.max(max_limit);
|
||||
@ -403,6 +413,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
hybrid,
|
||||
total_media,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
@ -450,6 +461,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
"hybrid": {
|
||||
"enabled": hybrid,
|
||||
"semantic_ratio": semantic_ratio,
|
||||
"total_media": total_media,
|
||||
},
|
||||
"pagination": {
|
||||
"max_limit": max_limit,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user