mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-08 21:55:42 +00:00
Compare commits
449 Commits
prototype-
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3b95007292 | ||
|
|
4a2b8be428 | ||
|
|
bea11a1353 | ||
|
|
5a7948bfab | ||
|
|
ef4c87accf | ||
|
|
ced7ea4a5c | ||
|
|
fef089c7b6 | ||
|
|
d47e1e15de | ||
|
|
a76a3e8f11 | ||
|
|
32dede35c7 | ||
|
|
6397ef12a0 | ||
|
|
b5e41f0e46 | ||
|
|
9f0d33ec99 | ||
|
|
90e6b6416f | ||
|
|
2b75072b09 | ||
|
|
6e6fd077d4 | ||
|
|
a051ab3d9a | ||
|
|
6b94033c97 | ||
|
|
dfe0c8664e | ||
|
|
0ca652de28 | ||
|
|
87f105747f | ||
|
|
735634e998 | ||
|
|
3740755d9c | ||
|
|
bbcabc47bd | ||
|
|
a06cb1bfd6 | ||
|
|
549dc985b8 | ||
|
|
428463e45c | ||
|
|
7113fcf63a | ||
|
|
aa6855cd4f | ||
|
|
895db76a51 | ||
|
|
a88146d59e | ||
|
|
91e77abf4f | ||
|
|
82a796aea7 | ||
|
|
f6287602e9 | ||
|
|
ede456c5b0 | ||
|
|
3f5b5df139 | ||
|
|
d72e5f5f69 | ||
|
|
aa366d593d | ||
|
|
205430854d | ||
|
|
be64006211 | ||
|
|
eda309d562 | ||
|
|
119d618a76 | ||
|
|
2b2e6c0b3a | ||
|
|
e6329e77e1 | ||
|
|
b086c51a23 | ||
|
|
9ce5598fef | ||
|
|
e30c24b5bf | ||
|
|
c1a132fa06 | ||
|
|
e54fc59248 | ||
|
|
11e7c0d75f | ||
|
|
c593fbe648 | ||
|
|
2b3327ea74 | ||
|
|
d14184f4da | ||
|
|
46bceb91f1 | ||
|
|
cab5e35ff7 | ||
|
|
f8232976ed | ||
|
|
22d363c05a | ||
|
|
41620d5325 | ||
|
|
f3d5c74c02 | ||
|
|
d48baece51 | ||
|
|
c45ede44a8 | ||
|
|
4235a82dcf | ||
|
|
e7b9b8f002 | ||
|
|
5716ab70f3 | ||
|
|
422a786ffd | ||
|
|
836ae19bec | ||
|
|
0b5bc41b79 | ||
|
|
b45059e8f2 | ||
|
|
c16c60b599 | ||
|
|
0114796d2a | ||
|
|
17a94c40dc | ||
|
|
76ca44b214 | ||
|
|
d2e4d6dd8a | ||
|
|
879cf85037 | ||
|
|
c2d5b20a42 | ||
|
|
b93ca3945e | ||
|
|
8fef48f8ca | ||
|
|
d2776efb11 | ||
|
|
9211e94c4f | ||
|
|
b7bebe9bbb | ||
|
|
37a692f942 | ||
|
|
25c19a306b | ||
|
|
c078efd730 | ||
|
|
9dac91efe0 | ||
|
|
074d509d92 | ||
|
|
d439a3cb9d | ||
|
|
259fc067d3 | ||
|
|
e8b2bb3ea6 | ||
|
|
7dfb2071b5 | ||
|
|
9cfbef478e | ||
|
|
efd5fd96cc | ||
|
|
0ef52941c7 | ||
|
|
0d85f8fcee | ||
|
|
f4bb6cbca8 | ||
|
|
ad03c86c44 | ||
|
|
85037352b9 | ||
|
|
1b54c866e1 | ||
|
|
e414284335 | ||
|
|
7a204609fe | ||
|
|
6b2b8ed676 | ||
|
|
6db5939f84 | ||
|
|
d35b2d8d33 | ||
|
|
0687cf058a | ||
|
|
7219299436 | ||
|
|
657bbf5d1e | ||
|
|
7fa1c41190 | ||
|
|
77802dabf6 | ||
|
|
a685eeafeb | ||
|
|
f16e6f7c37 | ||
|
|
900be0ccad | ||
|
|
51a087b764 | ||
|
|
31142b3663 | ||
|
|
e60b855a54 | ||
|
|
510a4b91be | ||
|
|
e704f4d1ec | ||
|
|
82fe80b360 | ||
|
|
0f1dd3614c | ||
|
|
3aa6c3c750 | ||
|
|
b956918c11 | ||
|
|
e3003c1609 | ||
|
|
bf13268649 | ||
|
|
0bb7866f1e | ||
|
|
e6e9a033aa | ||
|
|
63031219c5 | ||
|
|
44d6430bae | ||
|
|
4d26e9c6f2 | ||
|
|
2ff382c023 | ||
|
|
0f6dd133b2 | ||
|
|
29f6eeff8f | ||
|
|
ef007d547d | ||
|
|
3fc16c627d | ||
|
|
9422b6d654 | ||
|
|
ddba52414a | ||
|
|
a743da3061 | ||
|
|
c6216517c7 | ||
|
|
2d4f7c635e | ||
|
|
ee812b31c4 | ||
|
|
3329248a84 | ||
|
|
bc08cd0deb | ||
|
|
3e2f468213 | ||
|
|
7c448bcc00 | ||
|
|
acb7c0a449 | ||
|
|
e8795d2608 | ||
|
|
e023ee4b6b | ||
|
|
e74c3b692a | ||
|
|
1d3b18f774 | ||
|
|
00bc86e74b | ||
|
|
adc9976615 | ||
|
|
ae8c1461e1 | ||
|
|
5f62274f21 | ||
|
|
5f50fc9464 | ||
|
|
89498a2bea | ||
|
|
211c1b753f | ||
|
|
d08e89ea3d | ||
|
|
695877043a | ||
|
|
bc4d1530ee | ||
|
|
d7721fe607 | ||
|
|
4a179fb3c0 | ||
|
|
59a1c5d9a7 | ||
|
|
2f82d94502 | ||
|
|
bd2bd0f33b | ||
|
|
e02733df4a | ||
|
|
f373ecc96a | ||
|
|
748a327271 | ||
|
|
4925b30196 | ||
|
|
43c4a229b7 | ||
|
|
ca112a8b95 | ||
|
|
855fa555a3 | ||
|
|
a237c0797a | ||
|
|
5c46dc702a | ||
|
|
4cadc8113b | ||
|
|
c17031d3de | ||
|
|
fc6cc80705 | ||
|
|
138d20b277 | ||
|
|
7c1a9113f9 | ||
|
|
07ae297ffd | ||
|
|
4069dbcfca | ||
|
|
03eb50fbac | ||
|
|
2616d776f2 | ||
|
|
3004db95af | ||
|
|
9a729bf31d | ||
|
|
8bfa6a7f54 | ||
|
|
056f18bd02 | ||
|
|
fe9866aca8 | ||
|
|
60f105a4a3 | ||
|
|
abb399b802 | ||
|
|
aeaac7270e | ||
|
|
f45770a3ce | ||
|
|
0e10ff1aa3 | ||
|
|
6ee608c2d1 | ||
|
|
95e8a9bef1 | ||
|
|
0598320252 | ||
|
|
2269104337 | ||
|
|
6b4d69996c | ||
|
|
df4e3c2e43 | ||
|
|
e2b549c5ee | ||
|
|
8390006ebf | ||
|
|
7200437246 | ||
|
|
68e7bfb37f | ||
|
|
209c4bfc18 | ||
|
|
396d76046d | ||
|
|
9ae73e3c05 | ||
|
|
933e319364 | ||
|
|
596617dd31 | ||
|
|
f3dd6834c6 | ||
|
|
e8774ad079 | ||
|
|
5d191c479e | ||
|
|
c3368e6859 | ||
|
|
40776ed4cd | ||
|
|
9bda9a9a64 | ||
|
|
aefebdeb8b | ||
|
|
646e44ddf9 | ||
|
|
9275ce1503 | ||
|
|
48d2d3a5cd | ||
|
|
7ec0c9aa83 | ||
|
|
484fdd9ce2 | ||
|
|
7533a11143 | ||
|
|
19d077a4b1 | ||
|
|
b8845d1015 | ||
|
|
620867d611 | ||
|
|
77cc3678b5 | ||
|
|
a73d3c03e9 | ||
|
|
824f5b12ce | ||
|
|
bb4baf7fae | ||
|
|
0263eb0aec | ||
|
|
8a916a4e42 | ||
|
|
506ee40dc5 | ||
|
|
952fabf8a0 | ||
|
|
7ea2e4ec7b | ||
|
|
a0a4ac66ec | ||
|
|
b037e416d3 | ||
|
|
e9d547556d | ||
|
|
ab0eba2f72 | ||
|
|
5ceb3c6a10 | ||
|
|
34d572e3e5 | ||
|
|
28e6adc435 | ||
|
|
6a683975bf | ||
|
|
c60d11fb42 | ||
|
|
32207f9f19 | ||
|
|
7c1b15fd06 | ||
|
|
4352a924d7 | ||
|
|
bbe802c656 | ||
|
|
b32e30ad27 | ||
|
|
ae115cee78 | ||
|
|
1824fbd1b5 | ||
|
|
34d8a54c4b | ||
|
|
8fa6e8670a | ||
|
|
c640856cc1 | ||
|
|
1a1317ab0f | ||
|
|
9cab754942 | ||
|
|
4a0ec15ad2 | ||
|
|
985b892b7a | ||
|
|
605dea4f85 | ||
|
|
95d4775d4a | ||
|
|
416fcf47f1 | ||
|
|
6433e49882 | ||
|
|
85939ae8ad | ||
|
|
e654eddf56 | ||
|
|
170ad87e44 | ||
|
|
bc56087a17 | ||
|
|
29d82ade56 | ||
|
|
a7f5d3bb7a | ||
|
|
48e8356a16 | ||
|
|
1fda05c2fd | ||
|
|
8f96724adf | ||
|
|
01e5b0effa | ||
|
|
2ec9664878 | ||
|
|
7f5a0c0013 | ||
|
|
f5c3dad3ed | ||
|
|
10028515ac | ||
|
|
63ccd19ab1 | ||
|
|
1b4d344e18 | ||
|
|
89c0cf9b12 | ||
|
|
3770e70581 | ||
|
|
e497008161 | ||
|
|
a15ebb283f | ||
|
|
3f256a7959 | ||
|
|
b41af0d0f6 | ||
|
|
3ebff65ef3 | ||
|
|
62e2a5a324 | ||
|
|
90d96ee415 | ||
|
|
38b317857d | ||
|
|
765e76857f | ||
|
|
204cf423b2 | ||
|
|
e575b5af74 | ||
|
|
4fc24cb691 | ||
|
|
8bc8484e95 | ||
|
|
7b49c30d8c | ||
|
|
239851046d | ||
|
|
60796dfb14 | ||
|
|
c7cb72a77a | ||
|
|
4d819ea636 | ||
|
|
666680bd87 | ||
|
|
27527849bb | ||
|
|
1d02efeab9 | ||
|
|
53fc98d3b0 | ||
|
|
263300b3a3 | ||
|
|
ab3d92d163 | ||
|
|
ef9fc6c854 | ||
|
|
61b0f50d4d | ||
|
|
0557a4dd2f | ||
|
|
930d5a09a8 | ||
|
|
8b0c4291ae | ||
|
|
c9efdf8c88 | ||
|
|
72736c0ea9 | ||
|
|
49317bbee4 | ||
|
|
af54c8381e | ||
|
|
693fcd5752 | ||
|
|
733175359a | ||
|
|
7c6162f0bf | ||
|
|
d6ae39bf0f | ||
|
|
e416bbc1de | ||
|
|
2cfd363dc6 | ||
|
|
70aa78a2c2 | ||
|
|
96c81762ed | ||
|
|
0b1f634afa | ||
|
|
d3d5015854 | ||
|
|
f95f29c492 | ||
|
|
a50b69b868 | ||
|
|
3668f5f021 | ||
|
|
54fdf379bb | ||
|
|
41b1cd5a73 | ||
|
|
5c14a25d5a | ||
|
|
fda2843135 | ||
|
|
9347330f3a | ||
|
|
56c9190dab | ||
|
|
6b986dceaf | ||
|
|
ea6bb4df1d | ||
|
|
a3d2f64725 | ||
|
|
d5526cffff | ||
|
|
5cb75d1f2a | ||
|
|
921e3c4ffe | ||
|
|
52591761af | ||
|
|
f80182f0a9 | ||
|
|
3b30b6a57a | ||
|
|
5efc78db55 | ||
|
|
cffbe3fcb6 | ||
|
|
8d8fcb9846 | ||
|
|
20049669c9 | ||
|
|
db28d13cb1 | ||
|
|
5a7cfc57fd | ||
|
|
790621dc29 | ||
|
|
1d577ae98b | ||
|
|
88e9a55d44 | ||
|
|
dbe551cf99 | ||
|
|
a299fbd33b | ||
|
|
193119acb9 | ||
|
|
4c71118699 | ||
|
|
5fe2943d3c | ||
|
|
86ff502327 | ||
|
|
6b1a345dce | ||
|
|
b54ece690b | ||
|
|
3ea167bade | ||
|
|
1158d6689f | ||
|
|
d9b0463a0b | ||
|
|
ae9899f179 | ||
|
|
308fd7128e | ||
|
|
27e7c00622 | ||
|
|
58207da934 | ||
|
|
fb8b832192 | ||
|
|
17207b5405 | ||
|
|
bd95503eba | ||
|
|
8b8b0d802c | ||
|
|
d329e86250 | ||
|
|
d416b3b390 | ||
|
|
54f5e74744 | ||
|
|
fd4b192a39 | ||
|
|
3c13feebf7 | ||
|
|
1811168b96 | ||
|
|
b06cc1e0a2 | ||
|
|
44f812c36d | ||
|
|
c8e77b5f25 | ||
|
|
283f516e15 | ||
|
|
b4ca0a8c98 | ||
|
|
b658e38acd | ||
|
|
f87e46cc16 | ||
|
|
65354b414a | ||
|
|
025df397c0 | ||
|
|
f77abc9dc8 | ||
|
|
7e9909ee45 | ||
|
|
43ec97fe45 | ||
|
|
02929e241b | ||
|
|
c13efde042 | ||
|
|
36f0a1492c | ||
|
|
ce65ad213b | ||
|
|
3e0de6cb83 | ||
|
|
f3d691667d | ||
|
|
ce9c930d10 | ||
|
|
fc88b003b4 | ||
|
|
cf5d26124a | ||
|
|
38b1c57fa8 | ||
|
|
25c525b057 | ||
|
|
83cd28b60b | ||
|
|
48cad4132a | ||
|
|
4897ad99d0 | ||
|
|
46ff78b4ec | ||
|
|
9ad43b6841 | ||
|
|
c9ec502ed9 | ||
|
|
18aed75d3b | ||
|
|
6738a4f6ee | ||
|
|
d2948adea3 | ||
|
|
f54b57e5be | ||
|
|
95821d0bde | ||
|
|
f690fa0686 | ||
|
|
24e94b28c1 | ||
|
|
34d58f35c8 | ||
|
|
1d5265caf4 | ||
|
|
97aeb6db4d | ||
|
|
f888f87635 | ||
|
|
8c8d98eeaa | ||
|
|
c5ae43cac6 | ||
|
|
57eecd6197 | ||
|
|
2fe5c78cb6 | ||
|
|
8047cfe438 | ||
|
|
5717e5c1af | ||
|
|
bb07038c31 | ||
|
|
d1a088ea0b | ||
|
|
b68e22c0e6 | ||
|
|
03a36f116e | ||
|
|
8a0bf24ed5 | ||
|
|
e2763471e5 | ||
|
|
b2f2c5d69f | ||
|
|
1594c54e23 | ||
|
|
13b607bd68 | ||
|
|
3d130d31c8 | ||
|
|
4cda584b0c | ||
|
|
248c90bad5 | ||
|
|
0e9040e605 | ||
|
|
3e3c00f44c | ||
|
|
d986a3bbaf | ||
|
|
c2ceb8e41b | ||
|
|
79db2e67fb | ||
|
|
865f24cfef | ||
|
|
3fbe1df770 | ||
|
|
150d1db86b | ||
|
|
806e983aa5 | ||
|
|
e96c1d4b0f | ||
|
|
15cdc6924b | ||
|
|
677e8b122c | ||
|
|
75a7e40a27 | ||
|
|
c8939944c6 | ||
|
|
4e6252fb03 | ||
|
|
8bd8e744f3 | ||
|
|
53f32a7dd7 | ||
|
|
47a7ed93d3 | ||
|
|
2ac826edca | ||
|
|
89aff2081c | ||
|
|
3b773b3416 | ||
|
|
648b2876f6 |
10
.github/workflows/db-change-missing.yml
vendored
10
.github/workflows/db-change-missing.yml
vendored
@@ -4,22 +4,22 @@ on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, labeled, unlabeled]
|
||||
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
|
||||
jobs:
|
||||
check-labels:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
- name: Check db change labels
|
||||
id: check_labels
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
URL=/repos/meilisearch/meilisearch/pulls/${{ github.event.pull_request.number }}/labels
|
||||
echo ${{ github.event.pull_request.number }}
|
||||
echo $URL
|
||||
LABELS=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /repos/meilisearch/meilisearch/issues/${{ github.event.pull_request.number }}/labels -q .[].name)
|
||||
LABELS=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/labels -q .[].name)
|
||||
echo "Labels: $LABELS"
|
||||
if [[ ! "$LABELS" =~ "db change" && ! "$LABELS" =~ "no db change" ]]; then
|
||||
echo "::error::Pull request must contain either the 'db change' or 'no db change' label."
|
||||
exit 1
|
||||
|
||||
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
uses: svenstaro/upload-release-action@2.11.1
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
|
||||
8
.github/workflows/publish-binaries.yml
vendored
8
.github/workflows/publish-binaries.yml
vendored
@@ -51,7 +51,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
uses: svenstaro/upload-release-action@2.11.1
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
uses: svenstaro/upload-release-action@2.11.1
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
@@ -113,7 +113,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
uses: svenstaro/upload-release-action@2.11.1
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
@@ -178,7 +178,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
uses: svenstaro/upload-release-action@2.11.1
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
|
||||
10
.github/workflows/test-suite.yml
vendored
10
.github/workflows/test-suite.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
@@ -155,7 +155,7 @@ jobs:
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -172,7 +172,7 @@ jobs:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -18,5 +18,8 @@
|
||||
## ... unreviewed
|
||||
*.snap.new
|
||||
|
||||
# Database snapshot
|
||||
crates/meilisearch/db.snapshot
|
||||
|
||||
# Fuzzcheck data for the facet indexing fuzz test
|
||||
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
|
||||
|
||||
@@ -57,9 +57,17 @@ This command will be triggered to each PR as a requirement for merging it.
|
||||
You can set the `LINDERA_CACHE` environment variable to speed up your successive builds by up to 2 minutes.
|
||||
It'll store some built artifacts in the directory of your choice.
|
||||
|
||||
We recommend using the standard `$HOME/.cache/lindera` directory:
|
||||
We recommend using the `$HOME/.cache/meili/lindera` directory:
|
||||
```sh
|
||||
export LINDERA_CACHE=$HOME/.cache/lindera
|
||||
export LINDERA_CACHE=$HOME/.cache/meili/lindera
|
||||
```
|
||||
|
||||
You can set the `MILLI_BENCH_DATASETS_PATH` environment variable to further speed up your builds.
|
||||
It'll store some big files used for the benchmarks in the directory of your choice.
|
||||
|
||||
We recommend using the `$HOME/.cache/meili/benches` directory:
|
||||
```sh
|
||||
export MILLI_BENCH_DATASETS_PATH=$HOME/.cache/meili/benches
|
||||
```
|
||||
|
||||
Furthermore, you can improve incremental compilation by setting the `MEILI_NO_VERGEN` environment variable.
|
||||
|
||||
689
Cargo.lock
generated
689
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.15.0"
|
||||
version = "1.16.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -11,27 +11,27 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
bumpalo = "3.16.0"
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
csv = "1.3.1"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.10"
|
||||
roaring = "0.10.12"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.95"
|
||||
bytes = "1.9.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.35"
|
||||
reqwest = { version = "0.12.15", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
anyhow = "1.0.98"
|
||||
bytes = "1.10.1"
|
||||
convert_case = "0.8.0"
|
||||
flate2 = "1.1.2"
|
||||
reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
||||
@@ -11,7 +11,7 @@ use milli::heed::{EnvOpenOptions, RwTxn};
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::vector::RuntimeEmbedders;
|
||||
use milli::{FilterableAttributesRule, Index};
|
||||
use rand::seq::SliceRandom;
|
||||
use rand_chacha::rand_core::SeedableRng;
|
||||
@@ -65,7 +65,7 @@ fn setup_settings<'t>(
|
||||
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_sortable_fields(sortable_fields);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
|
||||
}
|
||||
|
||||
fn setup_index_with_settings(
|
||||
@@ -166,9 +166,10 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -232,9 +233,10 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -276,9 +278,10 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -344,9 +347,10 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -420,9 +424,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -464,9 +469,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -504,9 +510,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -571,9 +578,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -637,9 +645,10 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -703,9 +712,10 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -768,9 +778,10 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -812,9 +823,10 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -879,9 +891,10 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -955,9 +968,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1000,9 +1014,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1041,9 +1056,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1107,9 +1123,10 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1172,9 +1189,10 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1216,9 +1234,10 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1283,9 +1302,10 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1331,9 +1351,10 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
||||
new_fields_ids_map,
|
||||
Some(primary_key),
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1395,9 +1416,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1439,9 +1461,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1479,9 +1502,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1568,9 +1592,10 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1658,9 +1683,10 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1740,9 +1766,10 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1806,9 +1833,10 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1871,9 +1899,10 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1915,9 +1944,10 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1982,9 +2012,10 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::vector::RuntimeEmbedders;
|
||||
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
|
||||
use serde_json::Value;
|
||||
|
||||
@@ -90,7 +90,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
|
||||
(conf.configure)(&mut builder);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
@@ -125,9 +125,10 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
RuntimeEmbedders::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ fn main() -> anyhow::Result<()> {
|
||||
writeln!(
|
||||
&mut manifest_paths_file,
|
||||
r#"pub const {}: &str = {:?};"#,
|
||||
dataset.to_case(Case::ScreamingSnake),
|
||||
dataset.to_case(Case::UpperSnake),
|
||||
out_file.display(),
|
||||
)?;
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.37", features = ["parsing"] }
|
||||
time = { version = "0.3.41", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.95"
|
||||
vergen-git2 = "1.0.2"
|
||||
anyhow = "1.0.98"
|
||||
vergen-git2 = "1.0.7"
|
||||
|
||||
@@ -11,21 +11,21 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
flate2 = "1.0.35"
|
||||
http = "1.2.0"
|
||||
anyhow = "1.0.98"
|
||||
flate2 = "1.1.2"
|
||||
http = "1.3.1"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.20.2"
|
||||
once_cell = "1.21.3"
|
||||
regex = "1.11.1"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
#![allow(clippy::type_complexity)]
|
||||
#![allow(clippy::wrong_self_convention)]
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::byte_unit::Byte;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::Unchecked;
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId};
|
||||
use meilisearch_types::tasks::{
|
||||
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
|
||||
};
|
||||
use meilisearch_types::InstanceUid;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -141,6 +146,12 @@ pub enum KindDump {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
},
|
||||
SnapshotCreation,
|
||||
Export {
|
||||
url: String,
|
||||
api_key: Option<String>,
|
||||
payload_size: Option<Byte>,
|
||||
indexes: BTreeMap<String, ExportIndexSettings>,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
@@ -213,6 +224,15 @@ impl From<KindWithContent> for KindDump {
|
||||
KindDump::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
|
||||
KindWithContent::Export { url, api_key, payload_size, indexes } => KindDump::Export {
|
||||
url,
|
||||
api_key,
|
||||
payload_size,
|
||||
indexes: indexes
|
||||
.into_iter()
|
||||
.map(|(pattern, settings)| (pattern.to_string(), settings))
|
||||
.collect(),
|
||||
},
|
||||
KindWithContent::UpgradeDatabase { from: version } => {
|
||||
KindDump::UpgradeDatabase { from: version }
|
||||
}
|
||||
@@ -329,6 +349,7 @@ pub(crate) mod test {
|
||||
write_channel_congestion: None,
|
||||
internal_database_sizes: Default::default(),
|
||||
},
|
||||
embedder_stats: Default::default(),
|
||||
enqueued_at: Some(BatchEnqueuedAt {
|
||||
earliest: datetime!(2022-11-11 0:00 UTC),
|
||||
oldest: datetime!(2022-11-11 0:00 UTC),
|
||||
|
||||
@@ -116,6 +116,15 @@ impl DumpReader {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn chat_completions_settings(
|
||||
&mut self,
|
||||
) -> Result<Box<dyn Iterator<Item = Result<(String, v6::ChatCompletionSettings)>> + '_>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => current.chat_completions_settings(),
|
||||
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.features()),
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufRead, BufReader, ErrorKind};
|
||||
use std::path::Path;
|
||||
@@ -21,6 +22,7 @@ pub type Unchecked = meilisearch_types::settings::Unchecked;
|
||||
pub type Task = crate::TaskDump;
|
||||
pub type Batch = meilisearch_types::batches::Batch;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
pub type Network = meilisearch_types::features::Network;
|
||||
|
||||
@@ -192,6 +194,34 @@ impl V6Reader {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn chat_completions_settings(
|
||||
&mut self,
|
||||
) -> Result<Box<dyn Iterator<Item = Result<(String, ChatCompletionSettings)>> + '_>> {
|
||||
let entries = match fs::read_dir(self.dump.path().join("chat-completions-settings")) {
|
||||
Ok(entries) => entries,
|
||||
Err(e) if e.kind() == ErrorKind::NotFound => return Ok(Box::new(std::iter::empty())),
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
Ok(Box::new(
|
||||
entries
|
||||
.map(|entry| -> Result<Option<_>> {
|
||||
let entry = entry?;
|
||||
let file_name = entry.file_name();
|
||||
let path = Path::new(&file_name);
|
||||
if entry.file_type()?.is_file() && path.extension() == Some(OsStr::new("json"))
|
||||
{
|
||||
let name = path.file_stem().unwrap().to_str().unwrap().to_string();
|
||||
let file = File::open(entry.path())?;
|
||||
let settings = serde_json::from_reader(file)?;
|
||||
Ok(Some((name, settings)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
})
|
||||
.filter_map(|entry| entry.transpose()),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
|
||||
self.features
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::path::PathBuf;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use serde_json::{Map, Value};
|
||||
@@ -51,6 +51,10 @@ impl DumpWriter {
|
||||
KeyWriter::new(self.dir.path().to_path_buf())
|
||||
}
|
||||
|
||||
pub fn create_chat_completions_settings(&self) -> Result<ChatCompletionsSettingsWriter> {
|
||||
ChatCompletionsSettingsWriter::new(self.dir.path().join("chat-completions-settings"))
|
||||
}
|
||||
|
||||
pub fn create_tasks_queue(&self) -> Result<TaskWriter> {
|
||||
TaskWriter::new(self.dir.path().join("tasks"))
|
||||
}
|
||||
@@ -104,6 +108,24 @@ impl KeyWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ChatCompletionsSettingsWriter {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl ChatCompletionsSettingsWriter {
|
||||
pub(crate) fn new(path: PathBuf) -> Result<Self> {
|
||||
std::fs::create_dir(&path)?;
|
||||
Ok(ChatCompletionsSettingsWriter { path })
|
||||
}
|
||||
|
||||
pub fn push_settings(&mut self, name: &str, settings: &ChatCompletionSettings) -> Result<()> {
|
||||
let mut settings_file = File::create(self.path.join(name).with_extension("json"))?;
|
||||
serde_json::to_writer(&mut settings_file, &settings)?;
|
||||
settings_file.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TaskWriter {
|
||||
queue: BufWriter<File>,
|
||||
update_files: PathBuf,
|
||||
|
||||
@@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
|
||||
@@ -14,7 +14,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
nom = "7.1.3"
|
||||
nom_locate = "4.2.0"
|
||||
unescaper = "0.1.5"
|
||||
unescaper = "0.1.6"
|
||||
|
||||
[dev-dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
|
||||
@@ -16,7 +16,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
|
||||
[[bench]]
|
||||
name = "benchmarks"
|
||||
|
||||
@@ -12,11 +12,11 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||
bumpalo = "3.16.0"
|
||||
clap = { version = "4.5.24", features = ["derive"] }
|
||||
either = "1.13.0"
|
||||
bumpalo = "3.18.1"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
either = "1.15.0"
|
||||
fastrand = "2.3.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
|
||||
@@ -13,7 +13,7 @@ use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::IndexerConfig;
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::vector::RuntimeEmbedders;
|
||||
use milli::Index;
|
||||
use serde_json::Value;
|
||||
use tempfile::TempDir;
|
||||
@@ -89,7 +89,7 @@ fn main() {
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let embedders = RuntimeEmbedders::default();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
|
||||
let mut operations = Vec::new();
|
||||
@@ -144,6 +144,7 @@ fn main() {
|
||||
embedders,
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -11,31 +11,31 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
anyhow = "1.0.98"
|
||||
bincode = "1.3.3"
|
||||
byte-unit = "5.1.6"
|
||||
bumpalo = "3.16.0"
|
||||
bumpalo = "3.18.1"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.6.0"
|
||||
convert_case = "0.8.0"
|
||||
csv = "1.3.1"
|
||||
derive_builder = "0.20.2"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.35"
|
||||
indexmap = "2.7.0"
|
||||
flate2 = "1.1.2"
|
||||
indexmap = "2.9.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.5"
|
||||
page_size = "0.6.0"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.138", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
@@ -43,7 +43,8 @@ time = { version = "0.3.37", features = [
|
||||
] }
|
||||
tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
backoff = "0.4.0"
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::io;
|
||||
use dump::{KindDump, TaskDump, UpdateFile};
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -211,6 +212,23 @@ impl<'a> Dump<'a> {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
KindDump::Export { url, api_key, payload_size, indexes } => {
|
||||
KindWithContent::Export {
|
||||
url,
|
||||
api_key,
|
||||
payload_size,
|
||||
indexes: indexes
|
||||
.into_iter()
|
||||
.map(|(pattern, settings)| {
|
||||
Ok((
|
||||
IndexUidPattern::try_from(pattern)
|
||||
.map_err(|_| Error::CorruptedDump)?,
|
||||
settings,
|
||||
))
|
||||
})
|
||||
.collect::<Result<_, Error>>()?,
|
||||
}
|
||||
}
|
||||
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
||||
},
|
||||
};
|
||||
|
||||
@@ -151,6 +151,10 @@ pub enum Error {
|
||||
CorruptedTaskQueue,
|
||||
#[error(transparent)]
|
||||
DatabaseUpgrade(Box<Self>),
|
||||
#[error(transparent)]
|
||||
Export(Box<Self>),
|
||||
#[error("Failed to export documents to remote server {code} ({type}): {message} <{link}>")]
|
||||
FromRemoteWhenExporting { message: String, code: String, r#type: String, link: String },
|
||||
#[error("Failed to rollback for index `{index}`: {rollback_outcome} ")]
|
||||
RollbackFailed { index: String, rollback_outcome: RollbackOutcome },
|
||||
#[error(transparent)]
|
||||
@@ -212,6 +216,7 @@ impl Error {
|
||||
| Error::BatchNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::FromRemoteWhenExporting { .. }
|
||||
| Error::AbortedTask
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
@@ -221,6 +226,7 @@ impl Error {
|
||||
| Error::IoError(_)
|
||||
| Error::Persist(_)
|
||||
| Error::FeatureNotEnabled(_)
|
||||
| Error::Export(_)
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
@@ -282,6 +288,7 @@ impl ErrorCode for Error {
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli { error, .. } => error.error_code(),
|
||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||
Error::FromRemoteWhenExporting { .. } => Code::Internal,
|
||||
Error::Heed(e) => e.error_code(),
|
||||
Error::HeedTransaction(e) => e.error_code(),
|
||||
Error::FileStore(e) => e.error_code(),
|
||||
@@ -294,6 +301,7 @@ impl ErrorCode for Error {
|
||||
Error::CorruptedTaskQueue => Code::Internal,
|
||||
Error::CorruptedDump => Code::Internal,
|
||||
Error::DatabaseUpgrade(_) => Code::Internal,
|
||||
Error::Export(_) => Code::Internal,
|
||||
Error::RollbackFailed { .. } => Code::Internal,
|
||||
Error::UnrecoverableError(_) => Code::Internal,
|
||||
Error::IndexSchedulerVersionMismatch { .. } => Code::Internal,
|
||||
|
||||
@@ -144,6 +144,19 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_multimodal(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.multimodal {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "multimodal",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/846",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
|
||||
@@ -289,6 +289,9 @@ fn snapshot_details(d: &Details) -> String {
|
||||
Details::IndexSwap { swaps } => {
|
||||
format!("{{ swaps: {swaps:?} }}")
|
||||
}
|
||||
Details::Export { url, api_key, payload_size, indexes } => {
|
||||
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
|
||||
}
|
||||
Details::UpgradeDatabase { from, to } => {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
}
|
||||
@@ -343,6 +346,7 @@ pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
uid,
|
||||
details,
|
||||
stats,
|
||||
embedder_stats,
|
||||
started_at,
|
||||
finished_at,
|
||||
progress: _,
|
||||
@@ -366,6 +370,12 @@ pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
|
||||
if !embedder_stats.skip_serializing() {
|
||||
snap.push_str(&format!(
|
||||
"embedder stats: {}, ",
|
||||
serde_json::to_string(&embedder_stats).unwrap()
|
||||
));
|
||||
}
|
||||
snap.push_str(&format!("stop reason: {}, ", serde_json::to_string(&stop_reason).unwrap()));
|
||||
snap.push('}');
|
||||
snap
|
||||
|
||||
@@ -55,14 +55,17 @@ use meilisearch_types::features::{
|
||||
ChatCompletionSettings, InstanceTogglableFeatures, Network, RuntimeTogglableFeatures,
|
||||
};
|
||||
use meilisearch_types::heed::byteorder::BE;
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str, I128};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeJson, Str, I128};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, WithoutTls};
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||
use meilisearch_types::milli::vector::json_template::JsonTemplate;
|
||||
use meilisearch_types::milli::vector::{
|
||||
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
|
||||
};
|
||||
use meilisearch_types::milli::{self, Index};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{KindWithContent, Task};
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use processing::ProcessingTasks;
|
||||
pub use queue::Query;
|
||||
use queue::Queue;
|
||||
@@ -77,6 +80,7 @@ use crate::utils::clamp_to_page_size;
|
||||
pub(crate) type BEI128 = I128<BE>;
|
||||
|
||||
const TASK_SCHEDULER_SIZE_THRESHOLD_PERCENT_INT: u64 = 40;
|
||||
const CHAT_SETTINGS_DB_NAME: &str = "chat-settings";
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IndexSchedulerOptions {
|
||||
@@ -279,7 +283,7 @@ impl IndexScheduler {
|
||||
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
|
||||
let queue = Queue::new(&env, &mut wtxn, &options)?;
|
||||
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
|
||||
let chat_settings = env.create_database(&mut wtxn, Some("chat-settings"))?;
|
||||
let chat_settings = env.create_database(&mut wtxn, Some(CHAT_SETTINGS_DB_NAME))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
// allow unreachable_code to get rids of the warning in the case of a test build.
|
||||
@@ -310,11 +314,7 @@ impl IndexScheduler {
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
pub fn write_txn(&self) -> Result<RwTxn> {
|
||||
self.env.write_txn().map_err(|e| e.into())
|
||||
}
|
||||
|
||||
pub fn read_txn(&self) -> Result<RoTxn<WithoutTls>> {
|
||||
fn read_txn(&self) -> Result<RoTxn<WithoutTls>> {
|
||||
self.env.read_txn().map_err(|e| e.into())
|
||||
}
|
||||
|
||||
@@ -556,7 +556,6 @@ impl IndexScheduler {
|
||||
/// And a `Vec` of the workspace_uids
|
||||
pub fn paginated_chat_workspace_uids(
|
||||
&self,
|
||||
_filters: &meilisearch_auth::AuthFilter,
|
||||
from: usize,
|
||||
limit: usize,
|
||||
) -> Result<(usize, Vec<String>)> {
|
||||
@@ -855,29 +854,42 @@ impl IndexScheduler {
|
||||
&self,
|
||||
index_uid: String,
|
||||
embedding_configs: Vec<IndexEmbeddingConfig>,
|
||||
) -> Result<EmbeddingConfigs> {
|
||||
) -> Result<RuntimeEmbedders> {
|
||||
let res: Result<_> = embedding_configs
|
||||
.into_iter()
|
||||
.map(
|
||||
|IndexEmbeddingConfig {
|
||||
name,
|
||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
|
||||
..
|
||||
}| {
|
||||
let prompt = Arc::new(
|
||||
prompt
|
||||
.try_into()
|
||||
.map_err(meilisearch_types::milli::Error::from)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
fragments,
|
||||
}|
|
||||
-> Result<(String, Arc<RuntimeEmbedder>)> {
|
||||
let document_template = prompt
|
||||
.try_into()
|
||||
.map_err(meilisearch_types::milli::Error::from)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
let fragments = fragments
|
||||
.into_inner()
|
||||
.into_iter()
|
||||
.map(|fragment| {
|
||||
let value = embedder_options.fragment(&fragment.name).unwrap();
|
||||
let template = JsonTemplate::new(value.clone()).unwrap();
|
||||
RuntimeFragment { name: fragment.name, id: fragment.id, template }
|
||||
})
|
||||
.collect();
|
||||
// optimistically return existing embedder
|
||||
{
|
||||
let embedders = self.embedders.read().unwrap();
|
||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||
return Ok((
|
||||
name,
|
||||
(embedder.clone(), prompt, quantized.unwrap_or_default()),
|
||||
let runtime = Arc::new(RuntimeEmbedder::new(
|
||||
embedder.clone(),
|
||||
document_template,
|
||||
fragments,
|
||||
quantized.unwrap_or_default(),
|
||||
));
|
||||
|
||||
return Ok((name, runtime));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -893,28 +905,44 @@ impl IndexScheduler {
|
||||
let mut embedders = self.embedders.write().unwrap();
|
||||
embedders.insert(embedder_options, embedder.clone());
|
||||
}
|
||||
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
||||
|
||||
let runtime = Arc::new(RuntimeEmbedder::new(
|
||||
embedder.clone(),
|
||||
document_template,
|
||||
fragments,
|
||||
quantized.unwrap_or_default(),
|
||||
));
|
||||
|
||||
Ok((name, runtime))
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
res.map(EmbeddingConfigs::new)
|
||||
res.map(RuntimeEmbedders::new)
|
||||
}
|
||||
|
||||
pub fn chat_settings(&self, rtxn: &RoTxn, uid: &str) -> Result<Option<ChatCompletionSettings>> {
|
||||
self.chat_settings.get(rtxn, uid).map_err(Into::into)
|
||||
pub fn chat_settings(&self, uid: &str) -> Result<Option<ChatCompletionSettings>> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.chat_settings.get(&rtxn, uid).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn put_chat_settings(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
uid: &str,
|
||||
settings: &ChatCompletionSettings,
|
||||
) -> Result<()> {
|
||||
self.chat_settings.put(wtxn, uid, settings).map_err(Into::into)
|
||||
/// Return true if chat workspace exists.
|
||||
pub fn chat_workspace_exists(&self, name: &str) -> Result<bool> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
Ok(self.chat_settings.remap_data_type::<DecodeIgnore>().get(&rtxn, name)?.is_some())
|
||||
}
|
||||
|
||||
pub fn delete_chat_settings(&self, wtxn: &mut RwTxn, uid: &str) -> Result<bool> {
|
||||
self.chat_settings.delete(wtxn, uid).map_err(Into::into)
|
||||
pub fn put_chat_settings(&self, uid: &str, settings: &ChatCompletionSettings) -> Result<()> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.chat_settings.put(&mut wtxn, uid, settings)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn delete_chat_settings(&self, uid: &str) -> Result<bool> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let deleted = self.chat_settings.delete(&mut wtxn, uid)?;
|
||||
wtxn.commit()?;
|
||||
Ok(deleted)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -103,6 +103,7 @@ make_enum_progress! {
|
||||
pub enum DumpCreationProgress {
|
||||
StartTheDumpCreation,
|
||||
DumpTheApiKeys,
|
||||
DumpTheChatCompletionSettings,
|
||||
DumpTheTasks,
|
||||
DumpTheBatches,
|
||||
DumpTheIndexes,
|
||||
@@ -175,8 +176,17 @@ make_enum_progress! {
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum Export {
|
||||
EnsuringCorrectnessOfTheTarget,
|
||||
ExportingTheSettings,
|
||||
ExportingTheDocuments,
|
||||
}
|
||||
}
|
||||
|
||||
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
|
||||
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||
make_atomic_progress!(Index alias AtomicIndexStep => "index" );
|
||||
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
||||
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
||||
|
||||
|
||||
@@ -179,6 +179,7 @@ impl BatchQueue {
|
||||
progress: None,
|
||||
details: batch.details,
|
||||
stats: batch.stats,
|
||||
embedder_stats: batch.embedder_stats.as_ref().into(),
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
enqueued_at: batch.enqueued_at,
|
||||
|
||||
@@ -71,6 +71,7 @@ impl From<KindWithContent> for AutobatchKind {
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::fmt;
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
@@ -47,6 +48,9 @@ pub(crate) enum Batch {
|
||||
IndexSwap {
|
||||
task: Task,
|
||||
},
|
||||
Export {
|
||||
task: Task,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
@@ -103,6 +107,7 @@ impl Batch {
|
||||
Batch::TaskCancelation { task, .. }
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::Export { task }
|
||||
| Batch::IndexUpdate { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
@@ -142,6 +147,7 @@ impl Batch {
|
||||
| TaskDeletions(_)
|
||||
| SnapshotCreation(_)
|
||||
| Dump(_)
|
||||
| Export { .. }
|
||||
| UpgradeDatabase { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
@@ -167,6 +173,7 @@ impl fmt::Display for Batch {
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
Batch::Export { .. } => f.write_str("Export")?,
|
||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||
};
|
||||
match index_uid {
|
||||
@@ -426,9 +433,10 @@ impl IndexScheduler {
|
||||
/// 0. We get the *last* task to cancel.
|
||||
/// 1. We get the tasks to upgrade.
|
||||
/// 2. We get the *next* task to delete.
|
||||
/// 3. We get the *next* snapshot to process.
|
||||
/// 4. We get the *next* dump to process.
|
||||
/// 5. We get the *next* tasks to process for a specific index.
|
||||
/// 3. We get the *next* export to process.
|
||||
/// 4. We get the *next* snapshot to process.
|
||||
/// 5. We get the *next* dump to process.
|
||||
/// 6. We get the *next* tasks to process for a specific index.
|
||||
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
|
||||
pub(crate) fn create_next_batch(
|
||||
&self,
|
||||
@@ -500,7 +508,17 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 3. we batch the snapshot.
|
||||
// 3. we batch the export.
|
||||
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
|
||||
if !to_export.is_empty() {
|
||||
let task_id = to_export.iter().next().expect("There must be at least one export task");
|
||||
let mut task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
|
||||
current_batch.processing([&mut task]);
|
||||
current_batch.reason(BatchStopReason::TaskKindCannotBeBatched { kind: Kind::Export });
|
||||
return Ok(Some((Batch::Export { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
@@ -510,7 +528,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the dumps.
|
||||
// 5. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
@@ -523,7 +541,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
// 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
@@ -577,7 +595,11 @@ impl IndexScheduler {
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
|
||||
|
||||
if let Some(uuid) = task.content_uuid() {
|
||||
let content_size = self.queue.file_store.compute_size(uuid)?;
|
||||
let content_size = match self.queue.file_store.compute_size(uuid) {
|
||||
Ok(content_size) => content_size,
|
||||
Err(file_store::Error::IoError(err)) if err.kind() == ErrorKind::NotFound => 0,
|
||||
Err(otherwise) => return Err(otherwise.into()),
|
||||
};
|
||||
total_size = total_size.saturating_add(content_size);
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ mod autobatcher_test;
|
||||
mod create_batch;
|
||||
mod process_batch;
|
||||
mod process_dump_creation;
|
||||
mod process_export;
|
||||
mod process_index_operation;
|
||||
mod process_snapshot_creation;
|
||||
mod process_upgrade;
|
||||
@@ -237,7 +238,7 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
|
||||
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u64);
|
||||
progress.update_progress(task_progress_obj);
|
||||
process_batch_info = info;
|
||||
let mut success = 0;
|
||||
@@ -316,7 +317,7 @@ impl IndexScheduler {
|
||||
Err(err) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed);
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u64);
|
||||
progress.update_progress(task_progress_obj);
|
||||
|
||||
if matches!(err, Error::DatabaseUpgrade(_)) {
|
||||
|
||||
@@ -162,8 +162,13 @@ impl IndexScheduler {
|
||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||
|
||||
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
|
||||
let (tasks, congestion) =
|
||||
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
|
||||
let (tasks, congestion) = self.apply_index_operation(
|
||||
&mut index_wtxn,
|
||||
&index,
|
||||
op,
|
||||
&progress,
|
||||
current_batch.embedder_stats.clone(),
|
||||
)?;
|
||||
|
||||
{
|
||||
progress.update_progress(FinalizingIndexStep::Committing);
|
||||
@@ -238,10 +243,12 @@ impl IndexScheduler {
|
||||
);
|
||||
builder.set_primary_key(primary_key);
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
&|| must_stop_processing.get(),
|
||||
&progress,
|
||||
current_batch.embedder_stats.clone(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
index_wtxn.commit()?;
|
||||
@@ -346,8 +353,8 @@ impl IndexScheduler {
|
||||
for (step, swap) in swaps.iter().enumerate() {
|
||||
progress.update_progress(VariableNameStep::<SwappingTheIndexes>::new(
|
||||
format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
|
||||
step as u32,
|
||||
swaps.len() as u32,
|
||||
step as u64,
|
||||
swaps.len() as u64,
|
||||
));
|
||||
self.apply_index_swap(
|
||||
&mut wtxn,
|
||||
@@ -361,6 +368,46 @@ impl IndexScheduler {
|
||||
task.status = Status::Succeeded;
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::Export { mut task } => {
|
||||
let KindWithContent::Export { url, api_key, payload_size, indexes } = &task.kind
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let ret = catch_unwind(AssertUnwindSafe(|| {
|
||||
self.process_export(
|
||||
url,
|
||||
api_key.as_deref(),
|
||||
payload_size.as_ref(),
|
||||
indexes,
|
||||
progress,
|
||||
)
|
||||
}));
|
||||
|
||||
let stats = match ret {
|
||||
Ok(Ok(stats)) => stats,
|
||||
Ok(Err(Error::AbortedTask)) => return Err(Error::AbortedTask),
|
||||
Ok(Err(e)) => return Err(Error::Export(Box::new(e))),
|
||||
Err(e) => {
|
||||
let msg = match e.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match e.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
return Err(Error::Export(Box::new(Error::ProcessBatchPanicked(
|
||||
msg.to_string(),
|
||||
))));
|
||||
}
|
||||
};
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
if let Some(Details::Export { indexes, .. }) = task.details.as_mut() {
|
||||
*indexes = stats;
|
||||
}
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::UpgradeDatabase { mut tasks } => {
|
||||
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
||||
unreachable!();
|
||||
@@ -425,7 +472,7 @@ impl IndexScheduler {
|
||||
// 3. before_name -> new_name in the task's KindWithContent
|
||||
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
|
||||
let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids;
|
||||
let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32);
|
||||
let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u64);
|
||||
progress.update_progress(task_progress);
|
||||
|
||||
for task_id in tasks_to_update {
|
||||
@@ -482,7 +529,7 @@ impl IndexScheduler {
|
||||
// The tasks that have been removed *per batches*.
|
||||
let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
|
||||
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u64);
|
||||
progress.update_progress(task_progress);
|
||||
for task_id in to_delete_tasks.iter() {
|
||||
let task =
|
||||
@@ -528,7 +575,7 @@ impl IndexScheduler {
|
||||
|
||||
progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata);
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(
|
||||
(affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32,
|
||||
(affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u64,
|
||||
);
|
||||
progress.update_progress(task_progress);
|
||||
for index in affected_indexes.iter() {
|
||||
@@ -547,7 +594,7 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
progress.update_progress(TaskDeletionProgress::DeletingTasks);
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u64);
|
||||
progress.update_progress(task_progress);
|
||||
for task in to_delete_tasks.iter() {
|
||||
self.queue.tasks.all_tasks.delete(wtxn, &task)?;
|
||||
@@ -564,7 +611,7 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
progress.update_progress(TaskDeletionProgress::DeletingBatches);
|
||||
let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32);
|
||||
let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u64);
|
||||
progress.update_progress(batch_progress);
|
||||
for (batch_id, to_delete_tasks) in affected_batches {
|
||||
if let Some(mut tasks) = self.queue.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
|
||||
@@ -708,9 +755,11 @@ impl IndexScheduler {
|
||||
from.1,
|
||||
from.2
|
||||
);
|
||||
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||
let ret = catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||
self.process_rollback(from, progress)
|
||||
})) {
|
||||
}));
|
||||
|
||||
match ret {
|
||||
Ok(Ok(())) => {}
|
||||
Ok(Err(err)) => return Err(Error::DatabaseUpgrade(Box::new(err))),
|
||||
Err(e) => {
|
||||
@@ -737,7 +786,7 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
// 3. We now have a list of tasks to cancel, cancel them
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u64);
|
||||
progress.update_progress(progress_obj);
|
||||
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(
|
||||
@@ -748,7 +797,7 @@ impl IndexScheduler {
|
||||
)?;
|
||||
|
||||
progress.update_progress(TaskCancelationProgress::UpdatingTasks);
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u64);
|
||||
progress.update_progress(progress_obj);
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Canceled;
|
||||
|
||||
@@ -43,12 +43,21 @@ impl IndexScheduler {
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// 2. dump the tasks
|
||||
// 2. dump the chat completion settings
|
||||
// TODO should I skip the export if the chat completion has been disabled?
|
||||
progress.update_progress(DumpCreationProgress::DumpTheChatCompletionSettings);
|
||||
let mut dump_chat_completion_settings = dump.create_chat_completions_settings()?;
|
||||
for result in self.chat_settings.iter(&rtxn)? {
|
||||
let (name, chat_settings) = result?;
|
||||
dump_chat_completion_settings.push_settings(name, &chat_settings)?;
|
||||
}
|
||||
|
||||
// 3. dump the tasks
|
||||
progress.update_progress(DumpCreationProgress::DumpTheTasks);
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
|
||||
let (atomic, update_task_progress) =
|
||||
AtomicTaskStep::new(self.queue.tasks.all_tasks.len(&rtxn)? as u32);
|
||||
AtomicTaskStep::new(self.queue.tasks.all_tasks.len(&rtxn)? as u64);
|
||||
progress.update_progress(update_task_progress);
|
||||
|
||||
for ret in self.queue.tasks.all_tasks.iter(&rtxn)? {
|
||||
@@ -81,7 +90,7 @@ impl IndexScheduler {
|
||||
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
|
||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file) = content_file {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
@@ -105,12 +114,12 @@ impl IndexScheduler {
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
// 3. dump the batches
|
||||
// 4. dump the batches
|
||||
progress.update_progress(DumpCreationProgress::DumpTheBatches);
|
||||
let mut dump_batches = dump.create_batches_queue()?;
|
||||
|
||||
let (atomic_batch_progress, update_batch_progress) =
|
||||
AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32);
|
||||
AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u64);
|
||||
progress.update_progress(update_batch_progress);
|
||||
|
||||
for ret in self.queue.batches.all_batches.iter(&rtxn)? {
|
||||
@@ -138,9 +147,9 @@ impl IndexScheduler {
|
||||
}
|
||||
dump_batches.flush()?;
|
||||
|
||||
// 4. Dump the indexes
|
||||
// 5. Dump the indexes
|
||||
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u64;
|
||||
let mut count = 0;
|
||||
let () = self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
||||
progress.update_progress(VariableNameStep::<DumpCreationProgress>::new(
|
||||
@@ -165,20 +174,17 @@ impl IndexScheduler {
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index
|
||||
.embedding_configs(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let nb_documents = index
|
||||
.number_of_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
||||
as u32;
|
||||
as u64;
|
||||
let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
|
||||
progress.update_progress(update_document_progress);
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// 4.1. Dump the documents
|
||||
// 5.1. Dump the documents
|
||||
for ret in documents {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
@@ -221,16 +227,12 @@ impl IndexScheduler {
|
||||
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
||||
};
|
||||
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == embedder_name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
)),
|
||||
regenerate: !user_provided,
|
||||
regenerate,
|
||||
};
|
||||
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||
}
|
||||
@@ -240,7 +242,7 @@ impl IndexScheduler {
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 4.2. Dump the settings
|
||||
// 5.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(
|
||||
index,
|
||||
&rtxn,
|
||||
@@ -251,7 +253,7 @@ impl IndexScheduler {
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// 5. Dump experimental feature settings
|
||||
// 6. Dump experimental feature settings
|
||||
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
||||
let features = self.features().runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
|
||||
365
crates/index-scheduler/src/scheduler/process_export.rs
Normal file
365
crates/index-scheduler/src/scheduler/process_export.rs
Normal file
@@ -0,0 +1,365 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::{self, Write as _};
|
||||
use std::sync::atomic;
|
||||
use std::time::Duration;
|
||||
|
||||
use backoff::ExponentialBackoff;
|
||||
use byte_unit::Byte;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::update::{request_threads, Setting};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError};
|
||||
use meilisearch_types::settings::{self, SecretPolicy};
|
||||
use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings};
|
||||
use serde::Deserialize;
|
||||
use ureq::{json, Response};
|
||||
|
||||
use super::MustStopProcessing;
|
||||
use crate::processing::AtomicDocumentStep;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_export(
|
||||
&self,
|
||||
base_url: &str,
|
||||
api_key: Option<&str>,
|
||||
payload_size: Option<&Byte>,
|
||||
indexes: &BTreeMap<IndexUidPattern, ExportIndexSettings>,
|
||||
progress: Progress,
|
||||
) -> Result<BTreeMap<IndexUidPattern, DetailsExportIndexSettings>> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::ProcessExport)?;
|
||||
|
||||
let indexes: Vec<_> = self
|
||||
.index_names()?
|
||||
.into_iter()
|
||||
.flat_map(|uid| {
|
||||
indexes
|
||||
.iter()
|
||||
.find(|(pattern, _)| pattern.matches_str(&uid))
|
||||
.map(|(pattern, settings)| (pattern, uid, settings))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut output = BTreeMap::new();
|
||||
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() {
|
||||
if must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
progress.update_progress(VariableNameStep::<ExportIndex>::new(
|
||||
format!("Exporting index `{uid}`"),
|
||||
i as u64,
|
||||
indexes.len() as u64,
|
||||
));
|
||||
|
||||
let ExportIndexSettings { filter, override_settings } = export_settings;
|
||||
let index = self.index(uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
|
||||
// First, check if the index already exists
|
||||
let url = format!("{base_url}/indexes/{uid}");
|
||||
let response = retry(&must_stop_processing, || {
|
||||
let mut request = agent.get(&url);
|
||||
if let Some(api_key) = api_key {
|
||||
request = request.set("Authorization", &format!("Bearer {api_key}"));
|
||||
}
|
||||
|
||||
request.send_bytes(Default::default()).map_err(into_backoff_error)
|
||||
});
|
||||
let index_exists = match response {
|
||||
Ok(response) => response.status() == 200,
|
||||
Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => {
|
||||
false
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
let primary_key = index
|
||||
.primary_key(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
// Create the index
|
||||
if !index_exists {
|
||||
let url = format!("{base_url}/indexes");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&url);
|
||||
if let Some(api_key) = api_key {
|
||||
request = request.set("Authorization", &format!("Bearer {api_key}"));
|
||||
}
|
||||
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Patch the index primary key
|
||||
if index_exists && *override_settings {
|
||||
let url = format!("{base_url}/indexes/{uid}");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.patch(&url);
|
||||
if let Some(api_key) = api_key {
|
||||
request = request.set("Authorization", &format!("Bearer {api_key}"));
|
||||
}
|
||||
let index_param = json!({ "primaryKey": primary_key });
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Send the index settings
|
||||
if !index_exists || *override_settings {
|
||||
let mut settings =
|
||||
settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// Remove the experimental chat setting if not enabled
|
||||
if self.features().check_chat_completions("exporting chat settings").is_err() {
|
||||
settings.chat = Setting::NotSet;
|
||||
}
|
||||
// Retry logic for sending settings
|
||||
let url = format!("{base_url}/indexes/{uid}/settings");
|
||||
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.patch(&url);
|
||||
if let Some(bearer) = bearer.as_ref() {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_json(settings.clone()).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
let filter = filter
|
||||
.as_ref()
|
||||
.map(Filter::from_json)
|
||||
.transpose()
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
||||
.flatten();
|
||||
|
||||
let filter_universe = filter
|
||||
.map(|f| f.evaluate(&index_rtxn, &index))
|
||||
.transpose()
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
let whole_universe = index
|
||||
.documents_ids(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
let universe = filter_universe.unwrap_or(whole_universe);
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
// We don't need to keep this one alive as we will
|
||||
// spawn many threads to process the documents
|
||||
drop(index_rtxn);
|
||||
|
||||
let total_documents = universe.len();
|
||||
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
|
||||
progress.update_progress(progress_step);
|
||||
|
||||
output.insert(
|
||||
IndexUidPattern::new_unchecked(uid.clone()),
|
||||
DetailsExportIndexSettings {
|
||||
settings: (*export_settings).clone(),
|
||||
matched_documents: Some(total_documents),
|
||||
},
|
||||
);
|
||||
|
||||
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(50 * 1024 * 1024); // defaults to 50 MiB
|
||||
let documents_url = format!("{base_url}/indexes/{uid}/documents");
|
||||
|
||||
request_threads()
|
||||
.broadcast(|ctx| {
|
||||
let index_rtxn = index
|
||||
.read_txn()
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let mut compressed_buffer = Vec::new();
|
||||
for (i, docid) in universe.iter().enumerate() {
|
||||
if i % ctx.num_threads() != ctx.index() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let document = index
|
||||
.document(&index_rtxn, docid)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let mut document = obkv_to_json(&all_fields, &fields_ids_map, document)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
// TODO definitely factorize this code
|
||||
'inject_vectors: {
|
||||
let embeddings = index
|
||||
.embeddings(&index_rtxn, docid)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME)
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
return Err(Error::from_milli(
|
||||
milli::Error::UserError(
|
||||
milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(
|
||||
&index_rtxn,
|
||||
std::iter::once(docid),
|
||||
)
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={docid}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
),
|
||||
Some(uid.to_string()),
|
||||
));
|
||||
};
|
||||
|
||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(
|
||||
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||
),
|
||||
regenerate,
|
||||
};
|
||||
vectors.insert(
|
||||
embedder_name,
|
||||
serde_json::to_value(embeddings).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
tmp_buffer.clear();
|
||||
serde_json::to_writer(&mut tmp_buffer, &document)
|
||||
.map_err(milli::InternalError::from)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
// Make sure we put at least one document in the buffer even
|
||||
// though we might go above the buffer limit before sending
|
||||
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
|
||||
// We compress the documents before sending them
|
||||
let mut encoder =
|
||||
GzEncoder::new(&mut compressed_buffer, Compression::default());
|
||||
encoder
|
||||
.write_all(&buffer)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||
encoder
|
||||
.finish()
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
request = request.set("Content-Encoding", "gzip");
|
||||
if let Some(api_key) = api_key {
|
||||
request = request
|
||||
.set("Authorization", &(format!("Bearer {api_key}")));
|
||||
}
|
||||
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
|
||||
})?;
|
||||
buffer.clear();
|
||||
compressed_buffer.clear();
|
||||
}
|
||||
buffer.extend_from_slice(&tmp_buffer);
|
||||
|
||||
if i % 100 == 0 {
|
||||
step.fetch_add(100, atomic::Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
if let Some(api_key) = api_key {
|
||||
request = request.set("Authorization", &(format!("Bearer {api_key}")));
|
||||
}
|
||||
request.send_bytes(&buffer).map_err(into_backoff_error)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| {
|
||||
Error::from_milli(
|
||||
milli::Error::InternalError(InternalError::PanicInThreadPool(e)),
|
||||
Some(uid.to_string()),
|
||||
)
|
||||
})?;
|
||||
|
||||
step.store(total_documents, atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
fn retry<F>(must_stop_processing: &MustStopProcessing, send_request: F) -> Result<ureq::Response>
|
||||
where
|
||||
F: Fn() -> Result<ureq::Response, backoff::Error<ureq::Error>>,
|
||||
{
|
||||
match backoff::retry(ExponentialBackoff::default(), || {
|
||||
if must_stop_processing.get() {
|
||||
return Err(backoff::Error::Permanent(ureq::Error::Status(
|
||||
u16::MAX,
|
||||
// 444: Connection Closed Without Response
|
||||
Response::new(444, "Abort", "Aborted task").unwrap(),
|
||||
)));
|
||||
}
|
||||
send_request()
|
||||
}) {
|
||||
Ok(response) => Ok(response),
|
||||
Err(backoff::Error::Permanent(e)) => Err(ureq_error_into_error(e)),
|
||||
Err(backoff::Error::Transient { err, retry_after: _ }) => Err(ureq_error_into_error(err)),
|
||||
}
|
||||
}
|
||||
|
||||
fn into_backoff_error(err: ureq::Error) -> backoff::Error<ureq::Error> {
|
||||
match err {
|
||||
// Those code status must trigger an automatic retry
|
||||
// <https://www.restapitutorial.com/advanced/responses/retries>
|
||||
ureq::Error::Status(408 | 429 | 500 | 502 | 503 | 504, _) => {
|
||||
backoff::Error::Transient { err, retry_after: None }
|
||||
}
|
||||
ureq::Error::Status(_, _) => backoff::Error::Permanent(err),
|
||||
ureq::Error::Transport(_) => backoff::Error::Transient { err, retry_after: None },
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a `ureq::Error` into an `Error`.
|
||||
fn ureq_error_into_error(error: ureq::Error) -> Error {
|
||||
#[derive(Deserialize)]
|
||||
struct MeiliError {
|
||||
message: String,
|
||||
code: String,
|
||||
r#type: String,
|
||||
link: String,
|
||||
}
|
||||
|
||||
match error {
|
||||
// This is a workaround to handle task abortion - the error propagation path
|
||||
// makes it difficult to cleanly surface the abortion at this level.
|
||||
ureq::Error::Status(u16::MAX, _) => Error::AbortedTask,
|
||||
ureq::Error::Status(_, response) => match response.into_json() {
|
||||
Ok(MeiliError { message, code, r#type, link }) => {
|
||||
Error::FromRemoteWhenExporting { message, code, r#type, link }
|
||||
}
|
||||
Err(e) => e.into(),
|
||||
},
|
||||
ureq::Error::Transport(transport) => io::Error::new(io::ErrorKind::Other, transport).into(),
|
||||
}
|
||||
}
|
||||
|
||||
enum ExportIndex {}
|
||||
@@ -1,11 +1,13 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use bumpalo::collections::CollectIn;
|
||||
use bumpalo::Bump;
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::milli::documents::PrimaryKey;
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
||||
use meilisearch_types::milli::update::DocumentAdditionResult;
|
||||
use meilisearch_types::milli::{self, ChannelCongestion, Filter, ThreadPoolNoAbortBuilder};
|
||||
use meilisearch_types::milli::{self, ChannelCongestion, Filter};
|
||||
use meilisearch_types::settings::apply_settings_to_builder;
|
||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||
use meilisearch_types::Index;
|
||||
@@ -24,7 +26,7 @@ impl IndexScheduler {
|
||||
/// The list of processed tasks.
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip(self, index_wtxn, index, progress),
|
||||
skip(self, index_wtxn, index, progress, embedder_stats),
|
||||
target = "indexing::scheduler"
|
||||
)]
|
||||
pub(crate) fn apply_index_operation<'i>(
|
||||
@@ -33,6 +35,7 @@ impl IndexScheduler {
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
progress: &Progress,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||
let indexer_alloc = Bump::new();
|
||||
let started_processing_at = std::time::Instant::now();
|
||||
@@ -86,8 +89,9 @@ impl IndexScheduler {
|
||||
let mut content_files_iter = content_files.iter();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let embedders = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
for operation in operations {
|
||||
match operation {
|
||||
@@ -113,18 +117,8 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
let pool = &indexer_config.thread_pool;
|
||||
|
||||
progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
|
||||
let (document_changes, operation_stats, primary_key) = indexer
|
||||
@@ -187,6 +181,7 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
&embedder_stats,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
|
||||
);
|
||||
@@ -266,18 +261,8 @@ impl IndexScheduler {
|
||||
|
||||
let mut congestion = None;
|
||||
if task.error.is_none() {
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
let pool = &indexer_config.thread_pool;
|
||||
|
||||
let candidates_count = candidates.len();
|
||||
progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
|
||||
@@ -290,8 +275,9 @@ impl IndexScheduler {
|
||||
})
|
||||
.unwrap()?;
|
||||
let embedders = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
progress.update_progress(DocumentEditionProgress::Indexing);
|
||||
@@ -308,6 +294,7 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
&embedder_stats,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
@@ -429,18 +416,8 @@ impl IndexScheduler {
|
||||
|
||||
let mut congestion = None;
|
||||
if !tasks.iter().all(|res| res.error.is_some()) {
|
||||
let local_pool;
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|i| format!("indexing-thread-{i}"))
|
||||
.build()
|
||||
.unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
let pool = &indexer_config.thread_pool;
|
||||
|
||||
progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
|
||||
let mut indexer = indexer::DocumentDeletion::new();
|
||||
@@ -448,8 +425,9 @@ impl IndexScheduler {
|
||||
indexer.delete_documents_by_docids(to_delete);
|
||||
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
||||
let embedders = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
progress.update_progress(DocumentDeletionProgress::Indexing);
|
||||
@@ -466,6 +444,7 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
&embedder_stats,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
@@ -498,14 +477,11 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
progress.update_progress(SettingsProgress::ApplyTheSettings);
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)
|
||||
let congestion = builder
|
||||
.execute(&|| must_stop_processing.get(), progress, embedder_stats)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
Ok((tasks, None))
|
||||
Ok((tasks, congestion))
|
||||
}
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
index_uid,
|
||||
@@ -521,6 +497,7 @@ impl IndexScheduler {
|
||||
tasks: cleared_tasks,
|
||||
},
|
||||
progress,
|
||||
embedder_stats.clone(),
|
||||
)?;
|
||||
|
||||
let (settings_tasks, _congestion) = self.apply_index_operation(
|
||||
@@ -528,6 +505,7 @@ impl IndexScheduler {
|
||||
index,
|
||||
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
||||
progress,
|
||||
embedder_stats,
|
||||
)?;
|
||||
|
||||
let mut tasks = settings_tasks;
|
||||
|
||||
@@ -58,7 +58,7 @@ impl IndexScheduler {
|
||||
// 2.4 Only copy the update files of the enqueued tasks
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
|
||||
let enqueued = self.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
|
||||
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
|
||||
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u64);
|
||||
progress.update_progress(update_file_progress);
|
||||
for task_id in enqueued {
|
||||
let task =
|
||||
@@ -74,12 +74,12 @@ impl IndexScheduler {
|
||||
// 3. Snapshot every indexes
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
|
||||
let index_mapping = self.index_mapper.index_mapping;
|
||||
let nb_indexes = index_mapping.len(&rtxn)? as u32;
|
||||
let nb_indexes = index_mapping.len(&rtxn)? as u64;
|
||||
|
||||
for (i, result) in index_mapping.iter(&rtxn)?.enumerate() {
|
||||
let (name, uuid) = result?;
|
||||
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
|
||||
name, i as u32, nb_indexes,
|
||||
name, i as u64, nb_indexes,
|
||||
));
|
||||
let index = self.index_mapper.index(&rtxn, name)?;
|
||||
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
||||
|
||||
@@ -22,8 +22,8 @@ impl IndexScheduler {
|
||||
}
|
||||
progress.update_progress(VariableNameStep::<UpgradeIndex>::new(
|
||||
format!("Upgrading index `{uid}`"),
|
||||
i as u32,
|
||||
indexes.len() as u32,
|
||||
i as u64,
|
||||
indexes.len() as u64,
|
||||
));
|
||||
let index = self.index(uid)?;
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
@@ -65,8 +65,8 @@ impl IndexScheduler {
|
||||
for (i, uid) in indexes.iter().enumerate() {
|
||||
progress.update_progress(VariableNameStep::<UpgradeIndex>::new(
|
||||
format!("Rollbacking index `{uid}`"),
|
||||
i as u32,
|
||||
indexes.len() as u32,
|
||||
i as u64,
|
||||
indexes.len() as u64,
|
||||
));
|
||||
let index_schd_rtxn = self.env.read_txn()?;
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test.rs
|
||||
expression: config.embedder_options
|
||||
---
|
||||
{
|
||||
"Rest": {
|
||||
"api_key": "My super secret",
|
||||
"distribution": null,
|
||||
"dimensions": 4,
|
||||
"url": "http://localhost:7777",
|
||||
"request": "{{text}}",
|
||||
"search_fragments": {},
|
||||
"indexing_fragments": {},
|
||||
"response": "{{embedding}}",
|
||||
"headers": {}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: simple_hf_config.embedder_options
|
||||
---
|
||||
{
|
||||
"HuggingFace": {
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"distribution": null,
|
||||
"pooling": "useModel"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: doc
|
||||
---
|
||||
{
|
||||
"doggo": "Intel",
|
||||
"breed": "beagle",
|
||||
"_vectors": {
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
||||
0.3
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: doc
|
||||
---
|
||||
{
|
||||
"doggo": "kefir",
|
||||
"breed": "patou",
|
||||
"_vectors": {
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
||||
0.3
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,17 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: simple_hf_config.embedder_options
|
||||
expression: fakerest_config.embedder_options
|
||||
---
|
||||
{
|
||||
"HuggingFace": {
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"Rest": {
|
||||
"api_key": "My super secret",
|
||||
"distribution": null,
|
||||
"pooling": "useModel"
|
||||
"dimensions": 384,
|
||||
"url": "http://localhost:7777",
|
||||
"request": "{{text}}",
|
||||
"search_fragments": {},
|
||||
"indexing_fragments": {},
|
||||
"response": "{{embedding}}",
|
||||
"headers": {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
@@ -37,7 +37,7 @@ catto [1,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -40,7 +40,7 @@ doggo [2,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -3,11 +3,11 @@ use std::collections::BTreeMap;
|
||||
use big_s::S;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::settings::SettingEmbeddingSettings;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
@@ -690,11 +690,20 @@ fn test_settings_update() {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap();
|
||||
let embedders = index.embedding_configs();
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
let IndexEmbeddingConfig { name, config, fragments } = configs.first().unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"default");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_json_snapshot!(config.embedder_options);
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -732,6 +741,7 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -765,6 +775,7 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -805,6 +816,7 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -846,6 +858,7 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
|
||||
@@ -3,13 +3,14 @@ use std::collections::BTreeMap;
|
||||
use big_s::S;
|
||||
use insta::assert_json_snapshot;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
||||
use meilisearch_types::milli::vector::SearchQuery;
|
||||
use meilisearch_types::milli::{self, obkv_to_json};
|
||||
use meilisearch_types::settings::{SettingEmbeddingSettings, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use milli::update::IndexDocumentsMethod::*;
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
use crate::test_utils::read_json;
|
||||
@@ -85,28 +86,51 @@ fn import_vectors() {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let embedders = index.embedding_configs();
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } =
|
||||
let IndexEmbeddingConfig { name, config: fakerest_config, fragments } =
|
||||
configs.get(0).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
||||
let fakerest_name = name.clone();
|
||||
|
||||
let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } =
|
||||
let IndexEmbeddingConfig { name, config: simple_hf_config, fragments } =
|
||||
configs.get(1).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
||||
let simple_hf_name = name.clone();
|
||||
|
||||
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
|
||||
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
||||
let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap();
|
||||
let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap();
|
||||
let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap();
|
||||
let hf_runtime = configs.get(&simple_hf_name).unwrap();
|
||||
let hf_embedder = &hf_runtime.embedder;
|
||||
let beagle_embed = hf_embedder
|
||||
.embed_search(SearchQuery::Text("Intel the beagle best doggo"), None)
|
||||
.unwrap();
|
||||
let lab_embed =
|
||||
hf_embedder.embed_search(SearchQuery::Text("Max the lab best doggo"), None).unwrap();
|
||||
let patou_embed = hf_embedder
|
||||
.embed_search(SearchQuery::Text("kefir the patou best doggo"), None)
|
||||
.unwrap();
|
||||
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
|
||||
};
|
||||
|
||||
@@ -166,22 +190,38 @@ fn import_vectors() {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let embedders = index.embedding_configs();
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||
configs.get(0).unwrap();
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == lab_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@@ -239,25 +279,41 @@ fn import_vectors() {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let embedders = index.embedding_configs();
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||
configs.get(0).unwrap();
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
// automatically changed to patou because set to regenerate
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == patou_embed, @"true");
|
||||
// remained beagle
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@@ -399,8 +455,8 @@ fn import_vectors_first_and_embedder_later() {
|
||||
.collect::<Vec<_>>();
|
||||
// the all the vectors linked to the new specified embedder have been removed
|
||||
// Only the unknown embedders stays in the document DB
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1,2,3]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4,5]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
|
||||
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||
// even though we specified the vector for the ID 3, it shouldn't be marked
|
||||
// as user provided since we explicitely marked it as NOT user provided.
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
@@ -426,19 +482,28 @@ fn import_vectors_first_and_embedder_later() {
|
||||
},
|
||||
quantized: None,
|
||||
},
|
||||
user_provided: RoaringBitmap<[1, 2]>,
|
||||
fragments: FragmentConfigs(
|
||||
[],
|
||||
),
|
||||
},
|
||||
]
|
||||
"###);
|
||||
let info =
|
||||
index.embedding_configs().embedder_info(&rtxn, "my_doggo_embedder").unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[1, 2, 3]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[1, 2]>");
|
||||
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
|
||||
// the document with the id 3 should keep its original embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embeddings = &embeddings["my_doggo_embedder"];
|
||||
let (embeddings, _) = &embeddings["my_doggo_embedder"];
|
||||
|
||||
snapshot!(embeddings.len(), @"1");
|
||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||
@@ -493,7 +558,7 @@ fn import_vectors_first_and_embedder_later() {
|
||||
"###);
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
|
||||
@@ -501,7 +566,7 @@ fn import_vectors_first_and_embedder_later() {
|
||||
// the document with the id 4 should generate an embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
}
|
||||
@@ -603,33 +668,35 @@ fn delete_document_containing_vector() {
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
quantized: None,
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
},
|
||||
user_provided: RoaringBitmap<[0]>,
|
||||
quantized: None,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
fragments: FragmentConfigs(
|
||||
[],
|
||||
),
|
||||
},
|
||||
]
|
||||
"###);
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let embedding = &embeddings["manual"];
|
||||
let (embedding, _) = &embeddings["manual"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
|
||||
index_scheduler
|
||||
@@ -647,30 +714,32 @@ fn delete_document_containing_vector() {
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
quantized: None,
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
},
|
||||
user_provided: RoaringBitmap<[]>,
|
||||
quantized: None,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
fragments: FragmentConfigs(
|
||||
[],
|
||||
),
|
||||
},
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -800,7 +869,7 @@ fn delete_embedder_with_user_provided_vectors() {
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"regenerate":false,"embeddings":[[0.0,0.0,0.0]]}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"regenerate":false,"embeddings":[[1.0,1.0,1.0]]}}}]"###);
|
||||
}
|
||||
|
||||
{
|
||||
@@ -835,6 +904,6 @@ fn delete_embedder_with_user_provided_vectors() {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// FIXME: redaction
|
||||
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###);
|
||||
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[0.0,0.0,0.0]]},\"my_doggo_embedder\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]]}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0]]}}}]""###);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ pub(crate) enum FailureLocation {
|
||||
InsideCreateBatch,
|
||||
InsideProcessBatch,
|
||||
PanicInsideProcessBatch,
|
||||
ProcessExport,
|
||||
ProcessUpgrade,
|
||||
AcquiringWtxn,
|
||||
UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 },
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
//! Utility functions on the DBs. Mainly getter and setters.
|
||||
|
||||
use crate::milli::progress::EmbedderStats;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::ops::Bound;
|
||||
use std::sync::Arc;
|
||||
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
|
||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||
@@ -27,6 +29,7 @@ pub struct ProcessingBatch {
|
||||
pub uid: BatchId,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStats,
|
||||
pub embedder_stats: Arc<EmbedderStats>,
|
||||
|
||||
pub statuses: HashSet<Status>,
|
||||
pub kinds: HashSet<Kind>,
|
||||
@@ -48,6 +51,7 @@ impl ProcessingBatch {
|
||||
uid,
|
||||
details: DetailsView::default(),
|
||||
stats: BatchStats::default(),
|
||||
embedder_stats: Default::default(),
|
||||
|
||||
statuses,
|
||||
kinds: HashSet::default(),
|
||||
@@ -146,6 +150,7 @@ impl ProcessingBatch {
|
||||
progress: None,
|
||||
details: self.details.clone(),
|
||||
stats: self.stats.clone(),
|
||||
embedder_stats: self.embedder_stats.as_ref().into(),
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
enqueued_at: self.enqueued_at,
|
||||
@@ -273,6 +278,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
||||
K::TaskCancelation { .. }
|
||||
| K::TaskDeletion { .. }
|
||||
| K::DumpCreation { .. }
|
||||
| K::Export { .. }
|
||||
| K::UpgradeDatabase { .. }
|
||||
| K::SnapshotCreation => (),
|
||||
};
|
||||
@@ -600,6 +606,9 @@ impl crate::IndexScheduler {
|
||||
Details::Dump { dump_uid: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::DumpCreation);
|
||||
}
|
||||
Details::Export { url: _, api_key: _, payload_size: _, indexes: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::Export);
|
||||
}
|
||||
Details::UpgradeDatabase { from: _, to: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.5.1"
|
||||
criterion = "0.6.0"
|
||||
|
||||
[[bench]]
|
||||
name = "depth"
|
||||
|
||||
@@ -14,4 +14,6 @@ license.workspace = true
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
once_cell = "1.20"
|
||||
once_cell = "1.21"
|
||||
regex-lite = "0.1.6"
|
||||
uuid = { version = "1.17.0", features = ["v4"] }
|
||||
|
||||
@@ -4,9 +4,16 @@ use std::path::{Path, PathBuf};
|
||||
use std::sync::Mutex;
|
||||
|
||||
pub use insta;
|
||||
use insta::internals::{Content, ContentPath};
|
||||
use once_cell::sync::Lazy;
|
||||
use regex_lite::Regex;
|
||||
|
||||
static SNAPSHOT_NAMES: Lazy<Mutex<HashMap<PathBuf, usize>>> = Lazy::new(Mutex::default);
|
||||
/// A regex to match UUIDs in messages, specifically looking for the UUID v4 format
|
||||
static UUID_IN_MESSAGE_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
/// Return the md5 hash of the given string
|
||||
pub fn hash_snapshot(snap: &str) -> String {
|
||||
@@ -26,6 +33,39 @@ pub fn default_snapshot_settings_for_test<'a>(
|
||||
let filename = path.file_name().unwrap().to_str().unwrap();
|
||||
settings.set_omit_expression(true);
|
||||
|
||||
fn uuid_in_message_redaction(content: Content, _content_path: ContentPath) -> Content {
|
||||
match &content {
|
||||
Content::String(s) => {
|
||||
let uuid_replaced = UUID_IN_MESSAGE_RE.replace_all(s, "[uuid]");
|
||||
Content::String(uuid_replaced.to_string())
|
||||
}
|
||||
_ => content,
|
||||
}
|
||||
}
|
||||
|
||||
fn uuid_in_json_key_redaction(content: Content, _content_path: ContentPath) -> Content {
|
||||
match content {
|
||||
Content::Map(map) => {
|
||||
let new_map = map
|
||||
.iter()
|
||||
.map(|(key, value)| match key {
|
||||
Content::String(s) => {
|
||||
let uuid_replaced = UUID_IN_MESSAGE_RE.replace_all(s, "[uuid]");
|
||||
(Content::String(uuid_replaced.to_string()), value.clone())
|
||||
}
|
||||
_ => (key.clone(), value.clone()),
|
||||
})
|
||||
.collect();
|
||||
Content::Map(new_map)
|
||||
}
|
||||
_ => content,
|
||||
}
|
||||
}
|
||||
|
||||
settings.add_dynamic_redaction(".**.message", uuid_in_message_redaction);
|
||||
settings.add_dynamic_redaction(".**.indexUid", uuid_in_message_redaction);
|
||||
settings.add_dynamic_redaction(".**.facetsByIndex", uuid_in_json_key_redaction);
|
||||
|
||||
let test_name = test_name.strip_suffix("::{{closure}}").unwrap_or(test_name);
|
||||
let test_name = test_name.rsplit("::").next().unwrap().to_owned();
|
||||
|
||||
@@ -232,6 +272,9 @@ macro_rules! json_string {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate as meili_snap;
|
||||
use crate::UUID_IN_MESSAGE_RE;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[test]
|
||||
fn snap() {
|
||||
snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820");
|
||||
@@ -279,4 +322,14 @@ mod tests {
|
||||
// snapshot_hash!("", name: "", @"d41d8cd98f00b204e9800998ecf8427e");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uuid_in_message_regex() {
|
||||
let uuid1 = Uuid::new_v4();
|
||||
let uuid2 = Uuid::new_v4();
|
||||
let uuid3 = Uuid::new_v4();
|
||||
let to_replace = format!("1 {uuid1} 2 {uuid2} 3 {uuid3} 4");
|
||||
let replaced = UUID_IN_MESSAGE_RE.replace_all(to_replace.as_str(), "[uuid]");
|
||||
assert_eq!(replaced, "1 [uuid] 2 [uuid] 3 [uuid] 4");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,10 +17,10 @@ hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
sha2 = "0.10.9"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
|
||||
@@ -125,12 +125,11 @@ impl HeedAuthStore {
|
||||
Action::MetricsAll => {
|
||||
actions.insert(Action::MetricsGet);
|
||||
}
|
||||
Action::ChatsAll => {
|
||||
actions.extend([Action::ChatsGet, Action::ChatsDelete]);
|
||||
}
|
||||
Action::ChatsSettingsAll => {
|
||||
actions.extend([
|
||||
Action::ChatsSettingsGet,
|
||||
Action::ChatsSettingsUpdate,
|
||||
Action::ChatsSettingsDelete,
|
||||
]);
|
||||
actions.extend([Action::ChatsSettingsGet, Action::ChatsSettingsUpdate]);
|
||||
}
|
||||
other => {
|
||||
actions.insert(*other);
|
||||
|
||||
@@ -11,37 +11,38 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.9.0", default-features = false }
|
||||
anyhow = "1.0.95"
|
||||
bumpalo = "3.16.0"
|
||||
actix-web = { version = "4.11.0", default-features = false }
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.6.0"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
convert_case = "0.8.0"
|
||||
csv = "1.3.1"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
either = { version = "1.13.0", features = ["serde"] }
|
||||
either = { version = "1.15.0", features = ["serde"] }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.35"
|
||||
flate2 = "1.1.2"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
rustc-hash = "2.1.0"
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
rustc-hash = "2.1.1"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = "1.43"
|
||||
utoipa = { version = "5.3.1", features = ["macros"] }
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
tokio = "1.45"
|
||||
utoipa = { version = "5.4.0", features = ["macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
|
||||
@@ -3,7 +3,7 @@ use serde::Serialize;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::batches::{Batch, BatchId, BatchStats};
|
||||
use crate::batches::{Batch, BatchId, BatchStats, EmbedderStatsView};
|
||||
use crate::task_view::DetailsView;
|
||||
use crate::tasks::serialize_duration;
|
||||
|
||||
@@ -14,7 +14,7 @@ pub struct BatchView {
|
||||
pub uid: BatchId,
|
||||
pub progress: Option<ProgressView>,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStats,
|
||||
pub stats: BatchStatsView,
|
||||
#[serde(serialize_with = "serialize_duration", default)]
|
||||
pub duration: Option<Duration>,
|
||||
#[serde(with = "time::serde::rfc3339", default)]
|
||||
@@ -25,13 +25,26 @@ pub struct BatchView {
|
||||
pub batch_strategy: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct BatchStatsView {
|
||||
#[serde(flatten)]
|
||||
pub stats: BatchStats,
|
||||
#[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)]
|
||||
pub embedder_requests: EmbedderStatsView,
|
||||
}
|
||||
|
||||
impl BatchView {
|
||||
pub fn from_batch(batch: &Batch) -> Self {
|
||||
Self {
|
||||
uid: batch.uid,
|
||||
progress: batch.progress.clone(),
|
||||
details: batch.details.clone(),
|
||||
stats: batch.stats.clone(),
|
||||
stats: BatchStatsView {
|
||||
stats: batch.stats.clone(),
|
||||
embedder_requests: batch.embedder_stats.clone(),
|
||||
},
|
||||
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::progress::ProgressView;
|
||||
use milli::progress::{EmbedderStats, ProgressView};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use utoipa::ToSchema;
|
||||
@@ -19,6 +19,8 @@ pub struct Batch {
|
||||
pub progress: Option<ProgressView>,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStats,
|
||||
#[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)]
|
||||
pub embedder_stats: EmbedderStatsView,
|
||||
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub started_at: OffsetDateTime,
|
||||
@@ -43,6 +45,7 @@ impl PartialEq for Batch {
|
||||
progress,
|
||||
details,
|
||||
stats,
|
||||
embedder_stats,
|
||||
started_at,
|
||||
finished_at,
|
||||
enqueued_at,
|
||||
@@ -53,6 +56,7 @@ impl PartialEq for Batch {
|
||||
&& progress.is_none() == other.progress.is_none()
|
||||
&& details == &other.details
|
||||
&& stats == &other.stats
|
||||
&& embedder_stats == &other.embedder_stats
|
||||
&& started_at == &other.started_at
|
||||
&& finished_at == &other.finished_at
|
||||
&& enqueued_at == &other.enqueued_at
|
||||
@@ -83,3 +87,30 @@ pub struct BatchStats {
|
||||
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
|
||||
pub internal_database_sizes: serde_json::Map<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct EmbedderStatsView {
|
||||
pub total: usize,
|
||||
pub failed: usize,
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub last_error: Option<String>,
|
||||
}
|
||||
|
||||
impl From<&EmbedderStats> for EmbedderStatsView {
|
||||
fn from(stats: &EmbedderStats) -> Self {
|
||||
let errors = stats.errors.read().unwrap_or_else(|p| p.into_inner());
|
||||
Self {
|
||||
total: stats.total_count.load(std::sync::atomic::Ordering::Relaxed),
|
||||
failed: errors.1 as usize,
|
||||
last_error: errors.0.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EmbedderStatsView {
|
||||
pub fn skip_serializing(&self) -> bool {
|
||||
self.total == 0 && self.failed == 0 && self.last_error.is_none()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -301,6 +301,7 @@ InvalidFacetSearchQuery , InvalidRequest , BAD_REQU
|
||||
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
||||
FacetSearchDisabled , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchMedia , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -308,6 +309,7 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
|
||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -389,8 +391,19 @@ InvalidDocumentEditionContext , InvalidRequest , BAD_REQU
|
||||
InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ;
|
||||
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsIndexChat , InvalidRequest , BAD_REQUEST ;
|
||||
// Export
|
||||
InvalidExportUrl , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportPayloadSize , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportIndexesPatterns , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportIndexFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportIndexOverrideSettings , InvalidRequest , BAD_REQUEST ;
|
||||
// Experimental features - Chat Completions
|
||||
ChatWorkspaceNotFound , InvalidRequest , NOT_FOUND ;
|
||||
UnimplementedExternalFunctionCalling , InvalidRequest , NOT_IMPLEMENTED ;
|
||||
UnimplementedNonStreamingChatCompletions , InvalidRequest , NOT_IMPLEMENTED ;
|
||||
UnimplementedMultiChoiceChatCompletions , InvalidRequest , NOT_IMPLEMENTED ;
|
||||
ChatNotFound , InvalidRequest , NOT_FOUND ;
|
||||
InvalidChatSettingDocumentTemplate , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionOrgId , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionProjectId , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionApiVersion , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -446,12 +459,14 @@ impl ErrorCode for milli::Error {
|
||||
| UserError::InvalidSettingsDimensions { .. }
|
||||
| UserError::InvalidUrl { .. }
|
||||
| UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
|
||||
| UserError::InvalidChatSettingsDocumentTemplateMaxBytes
|
||||
| UserError::InvalidPrompt(_)
|
||||
| UserError::InvalidDisableBinaryQuantization { .. }
|
||||
| UserError::InvalidSourceForNested { .. }
|
||||
| UserError::MissingSourceForNested { .. }
|
||||
| UserError::InvalidSettingsEmbedder { .. } => Code::InvalidSettingsEmbedders,
|
||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||
UserError::TooManyFragments(_) => Code::InvalidSettingsEmbedders,
|
||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
||||
UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
|
||||
|
||||
@@ -2,12 +2,13 @@ use std::collections::BTreeMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{Code, ResponseError};
|
||||
|
||||
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search.";
|
||||
pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str =
|
||||
"Search the database for relevant JSON documents using an optional query.";
|
||||
pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results.";
|
||||
pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query. You have access to two indexes: movies, steam. The movies index contains movies with overviews. The steam index contains steam games from the Steam platform with their prices";
|
||||
pub const DEFAULT_CHAT_PRE_QUERY_PROMPT: &str = "";
|
||||
pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query.";
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase", default)]
|
||||
@@ -20,6 +21,7 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub get_task_documents_route: bool,
|
||||
pub composite_embedders: bool,
|
||||
pub chat_completions: bool,
|
||||
pub multimodal: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
@@ -59,7 +61,7 @@ pub struct ChatCompletionSettings {
|
||||
#[serde(default)]
|
||||
pub deployment_id: Option<String>,
|
||||
#[serde(default)]
|
||||
pub base_api: Option<String>,
|
||||
pub base_url: Option<String>,
|
||||
#[serde(default)]
|
||||
pub api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
@@ -89,16 +91,30 @@ impl ChatCompletionSettings {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate(&self) -> Result<(), ResponseError> {
|
||||
use ChatCompletionSource::*;
|
||||
match self {
|
||||
Self { source: AzureOpenAi, base_url, deployment_id, api_version, .. } if base_url.is_none() || deployment_id.is_none() || api_version.is_none() => Err(ResponseError::from_msg(
|
||||
"azureOpenAi requires setting a valid `baseUrl`, `deploymentId`, and `apiVersion`".to_string(),
|
||||
Code::BadRequest,
|
||||
)),
|
||||
Self { source: VLlm, base_url, .. } if base_url.is_none() => Err(ResponseError::from_msg(
|
||||
"vLlm requires setting a valid `baseUrl`".to_string(),
|
||||
Code::BadRequest,
|
||||
)),
|
||||
_otherwise => Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum ChatCompletionSource {
|
||||
#[default]
|
||||
OpenAi,
|
||||
AzureOpenAi,
|
||||
Mistral,
|
||||
Gemini,
|
||||
VLlm,
|
||||
}
|
||||
|
||||
@@ -118,7 +134,6 @@ impl ChatCompletionSource {
|
||||
AzureOpenAi if Self::old_openai_model(model) => System,
|
||||
AzureOpenAi => Developer,
|
||||
Mistral => System,
|
||||
Gemini => System,
|
||||
VLlm => System,
|
||||
}
|
||||
}
|
||||
@@ -128,7 +143,8 @@ impl ChatCompletionSource {
|
||||
/// Old OpenAI models use the system role while new ones use the developer role.
|
||||
fn old_openai_model(model: &str) -> bool {
|
||||
["gpt-3.5", "gpt-4", "gpt-4.1", "gpt-4.5", "gpt-4o", "chatgpt-4o"].iter().any(|old| {
|
||||
model.starts_with(old) && model.chars().nth(old.len()).is_none_or(|last| last == '-')
|
||||
model.starts_with(old)
|
||||
&& model.chars().nth(old.chars().count()).is_none_or(|last| last == '-')
|
||||
})
|
||||
}
|
||||
|
||||
@@ -137,7 +153,6 @@ impl ChatCompletionSource {
|
||||
match self {
|
||||
OpenAi => Some("https://api.openai.com/v1/"),
|
||||
Mistral => Some("https://api.mistral.ai/v1/"),
|
||||
Gemini => Some("https://generativelanguage.googleapis.com/v1beta/openai/"),
|
||||
AzureOpenAi | VLlm => None,
|
||||
}
|
||||
}
|
||||
@@ -150,7 +165,6 @@ pub struct ChatCompletionPrompts {
|
||||
pub search_description: String,
|
||||
pub search_q_param: String,
|
||||
pub search_index_uid_param: String,
|
||||
pub pre_query: String,
|
||||
}
|
||||
|
||||
impl Default for ChatCompletionPrompts {
|
||||
@@ -160,7 +174,6 @@ impl Default for ChatCompletionPrompts {
|
||||
search_description: DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(),
|
||||
search_q_param: DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
|
||||
search_index_uid_param: DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
|
||||
pre_query: DEFAULT_CHAT_PRE_QUERY_PROMPT.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ use crate::index_uid::{IndexUid, IndexUidFormatError};
|
||||
|
||||
/// An index uid pattern is composed of only ascii alphanumeric characters, - and _, between 1 and 400
|
||||
/// bytes long and optionally ending with a *.
|
||||
#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, Hash)]
|
||||
#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[deserr(try_from(&String) = FromStr::from_str -> IndexUidPatternFormatError)]
|
||||
pub struct IndexUidPattern(String);
|
||||
|
||||
|
||||
@@ -317,19 +317,27 @@ pub enum Action {
|
||||
#[serde(rename = "experimental.update")]
|
||||
#[deserr(rename = "experimental.update")]
|
||||
ExperimentalFeaturesUpdate,
|
||||
#[serde(rename = "export")]
|
||||
#[deserr(rename = "export")]
|
||||
Export,
|
||||
#[serde(rename = "network.get")]
|
||||
#[deserr(rename = "network.get")]
|
||||
NetworkGet,
|
||||
#[serde(rename = "network.update")]
|
||||
#[deserr(rename = "network.update")]
|
||||
NetworkUpdate,
|
||||
// TODO should we rename it chatCompletions.get ?
|
||||
#[serde(rename = "chatCompletion")]
|
||||
#[deserr(rename = "chatCompletion")]
|
||||
#[serde(rename = "chatCompletions")]
|
||||
#[deserr(rename = "chatCompletions")]
|
||||
ChatCompletions,
|
||||
#[serde(rename = "chats.*")]
|
||||
#[deserr(rename = "chats.*")]
|
||||
ChatsAll,
|
||||
#[serde(rename = "chats.get")]
|
||||
#[deserr(rename = "chats.get")]
|
||||
ChatsGet,
|
||||
#[serde(rename = "chats.delete")]
|
||||
#[deserr(rename = "chats.delete")]
|
||||
ChatsDelete,
|
||||
#[serde(rename = "chatsSettings.*")]
|
||||
#[deserr(rename = "chatsSettings.*")]
|
||||
ChatsSettingsAll,
|
||||
@@ -339,9 +347,6 @@ pub enum Action {
|
||||
#[serde(rename = "chatsSettings.update")]
|
||||
#[deserr(rename = "chatsSettings.update")]
|
||||
ChatsSettingsUpdate,
|
||||
#[serde(rename = "chatsSettings.delete")]
|
||||
#[deserr(rename = "chatsSettings.delete")]
|
||||
ChatsSettingsDelete,
|
||||
}
|
||||
|
||||
impl Action {
|
||||
@@ -368,11 +373,12 @@ impl Action {
|
||||
SETTINGS_GET => Some(Self::SettingsGet),
|
||||
SETTINGS_UPDATE => Some(Self::SettingsUpdate),
|
||||
CHAT_COMPLETIONS => Some(Self::ChatCompletions),
|
||||
CHATS_ALL => Some(Self::ChatsAll),
|
||||
CHATS_GET => Some(Self::ChatsGet),
|
||||
CHATS_DELETE => Some(Self::ChatsDelete),
|
||||
CHATS_SETTINGS_ALL => Some(Self::ChatsSettingsAll),
|
||||
CHATS_SETTINGS_GET => Some(Self::ChatsSettingsGet),
|
||||
CHATS_SETTINGS_UPDATE => Some(Self::ChatsSettingsUpdate),
|
||||
CHATS_SETTINGS_DELETE => Some(Self::ChatsSettingsDelete),
|
||||
STATS_ALL => Some(Self::StatsAll),
|
||||
STATS_GET => Some(Self::StatsGet),
|
||||
METRICS_ALL => Some(Self::MetricsAll),
|
||||
@@ -435,13 +441,16 @@ pub mod actions {
|
||||
pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr();
|
||||
pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr();
|
||||
|
||||
pub const EXPORT: u8 = Export.repr();
|
||||
|
||||
pub const NETWORK_GET: u8 = NetworkGet.repr();
|
||||
pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr();
|
||||
|
||||
pub const CHAT_COMPLETIONS: u8 = ChatCompletions.repr();
|
||||
pub const CHATS_ALL: u8 = ChatsAll.repr();
|
||||
pub const CHATS_GET: u8 = ChatsGet.repr();
|
||||
pub const CHATS_DELETE: u8 = ChatsDelete.repr();
|
||||
pub const CHATS_SETTINGS_ALL: u8 = ChatsSettingsAll.repr();
|
||||
pub const CHATS_SETTINGS_GET: u8 = ChatsSettingsGet.repr();
|
||||
pub const CHATS_SETTINGS_UPDATE: u8 = ChatsSettingsUpdate.repr();
|
||||
pub const CHATS_SETTINGS_DELETE: u8 = ChatsSettingsDelete.repr();
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ pub mod versioning;
|
||||
pub use milli::{heed, Index};
|
||||
use uuid::Uuid;
|
||||
pub use versioning::VERSION_FILE_NAME;
|
||||
pub use {milli, serde_cs};
|
||||
pub use {byte_unit, milli, serde_cs};
|
||||
|
||||
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||
pub type InstanceUid = Uuid;
|
||||
|
||||
@@ -9,10 +9,11 @@ use std::str::FromStr;
|
||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||
use fst::IntoStreamer;
|
||||
use milli::disabled_typos_terms::DisabledTyposTerms;
|
||||
use milli::index::{IndexEmbeddingConfig, PrefixSearch};
|
||||
use milli::index::PrefixSearch;
|
||||
use milli::proximity::ProximityPrecision;
|
||||
pub use milli::update::ChatSettings;
|
||||
use milli::update::Setting;
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use utoipa::ToSchema;
|
||||
@@ -500,8 +501,11 @@ impl Settings<Unchecked> {
|
||||
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
|
||||
for (name, config) in configs.iter_mut() {
|
||||
let config_to_check = std::mem::take(config);
|
||||
let checked_config =
|
||||
milli::update::validate_embedding_settings(config_to_check.inner, name)?;
|
||||
let checked_config = milli::update::validate_embedding_settings(
|
||||
config_to_check.inner,
|
||||
name,
|
||||
milli::vector::settings::EmbeddingValidationContext::SettingsPartialUpdate,
|
||||
)?;
|
||||
*config = SettingEmbeddingSettings { inner: checked_config };
|
||||
}
|
||||
self.embedders = Setting::Set(configs);
|
||||
@@ -697,7 +701,7 @@ pub fn apply_settings_to_builder(
|
||||
match typo_tolerance {
|
||||
Setting::Set(ref value) => {
|
||||
match value.enabled {
|
||||
Setting::Set(val) => builder.set_autorize_typos(val),
|
||||
Setting::Set(val) => builder.set_authorize_typos(val),
|
||||
Setting::Reset => builder.reset_authorize_typos(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
@@ -751,6 +755,7 @@ pub fn apply_settings_to_builder(
|
||||
builder.reset_min_word_len_two_typos();
|
||||
builder.reset_exact_words();
|
||||
builder.reset_exact_attributes();
|
||||
builder.reset_disable_on_numbers();
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
@@ -910,6 +915,7 @@ pub fn settings(
|
||||
};
|
||||
|
||||
let embedders: BTreeMap<_, _> = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(rtxn)?
|
||||
.into_iter()
|
||||
.map(|IndexEmbeddingConfig { name, config, .. }| {
|
||||
@@ -968,6 +974,7 @@ pub fn settings(
|
||||
if let SecretPolicy::HideSecrets = secret_policy {
|
||||
settings.hide_secrets()
|
||||
}
|
||||
|
||||
Ok(settings)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use byte_unit::UnitType;
|
||||
use milli::Object;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
@@ -6,7 +9,9 @@ use utoipa::ToSchema;
|
||||
use crate::batches::BatchId;
|
||||
use crate::error::ResponseError;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
|
||||
use crate::tasks::{
|
||||
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -118,6 +123,15 @@ pub struct DetailsView {
|
||||
pub upgrade_from: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub upgrade_to: Option<String>,
|
||||
// exporting
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub url: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub api_key: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub payload_size: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub indexes: Option<BTreeMap<String, DetailsExportIndexSettings>>,
|
||||
}
|
||||
|
||||
impl DetailsView {
|
||||
@@ -238,6 +252,34 @@ impl DetailsView {
|
||||
Some(left)
|
||||
}
|
||||
},
|
||||
url: match (self.url.clone(), other.url.clone()) {
|
||||
(None, None) => None,
|
||||
(None, Some(url)) | (Some(url), None) => Some(url),
|
||||
// We should never be able to batch multiple exports at the same time.
|
||||
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
api_key: match (self.api_key.clone(), other.api_key.clone()) {
|
||||
(None, None) => None,
|
||||
(None, Some(key)) | (Some(key), None) => Some(key),
|
||||
// We should never be able to batch multiple exports at the same time.
|
||||
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
payload_size: match (self.payload_size.clone(), other.payload_size.clone()) {
|
||||
(None, None) => None,
|
||||
(None, Some(size)) | (Some(size), None) => Some(size),
|
||||
// We should never be able to batch multiple exports at the same time.
|
||||
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
indexes: match (self.indexes.clone(), other.indexes.clone()) {
|
||||
(None, None) => None,
|
||||
(None, Some(indexes)) | (Some(indexes), None) => Some(indexes),
|
||||
// We should never be able to batch multiple exports at the same time.
|
||||
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
// We want the earliest version
|
||||
upgrade_from: match (self.upgrade_from.clone(), other.upgrade_from.clone()) {
|
||||
(None, None) => None,
|
||||
@@ -327,6 +369,22 @@ impl From<Details> for DetailsView {
|
||||
Details::IndexSwap { swaps } => {
|
||||
DetailsView { swaps: Some(swaps), ..Default::default() }
|
||||
}
|
||||
Details::Export { url, api_key, payload_size, indexes } => DetailsView {
|
||||
url: Some(url),
|
||||
api_key: api_key.map(|mut api_key| {
|
||||
hide_secret(&mut api_key);
|
||||
api_key
|
||||
}),
|
||||
payload_size: payload_size
|
||||
.map(|ps| ps.get_appropriate_unit(UnitType::Both).to_string()),
|
||||
indexes: Some(
|
||||
indexes
|
||||
.into_iter()
|
||||
.map(|(pattern, settings)| (pattern.to_string(), settings))
|
||||
.collect(),
|
||||
),
|
||||
..Default::default()
|
||||
},
|
||||
Details::UpgradeDatabase { from, to } => DetailsView {
|
||||
upgrade_from: Some(format!("v{}.{}.{}", from.0, from.1, from.2)),
|
||||
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),
|
||||
@@ -335,3 +393,21 @@ impl From<Details> for DetailsView {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We definitely need to factorize the code to hide the secret key
|
||||
fn hide_secret(secret: &mut String) {
|
||||
match secret.len() {
|
||||
x if x < 10 => {
|
||||
secret.replace_range(.., "XXX...");
|
||||
}
|
||||
x if x < 20 => {
|
||||
secret.replace_range(2.., "XXXX...");
|
||||
}
|
||||
x if x < 30 => {
|
||||
secret.replace_range(3.., "XXXXX...");
|
||||
}
|
||||
_x => {
|
||||
secret.replace_range(5.., "XXXXXX...");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
use core::fmt;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::fmt::{Display, Write};
|
||||
use std::str::FromStr;
|
||||
|
||||
use byte_unit::Byte;
|
||||
use enum_iterator::Sequence;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use milli::Object;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use serde_json::Value;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
use utoipa::ToSchema;
|
||||
use utoipa::{schema, ToSchema};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::batches::BatchId;
|
||||
use crate::error::ResponseError;
|
||||
use crate::index_uid_pattern::IndexUidPattern;
|
||||
use crate::keys::Key;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::{versioning, InstanceUid};
|
||||
@@ -50,6 +53,7 @@ impl Task {
|
||||
| SnapshotCreation
|
||||
| TaskCancelation { .. }
|
||||
| TaskDeletion { .. }
|
||||
| Export { .. }
|
||||
| UpgradeDatabase { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
DocumentAdditionOrUpdate { index_uid, .. }
|
||||
@@ -86,6 +90,7 @@ impl Task {
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::SnapshotCreation
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. } => None,
|
||||
}
|
||||
}
|
||||
@@ -108,11 +113,11 @@ pub enum KindWithContent {
|
||||
},
|
||||
DocumentDeletionByFilter {
|
||||
index_uid: String,
|
||||
filter_expr: serde_json::Value,
|
||||
filter_expr: Value,
|
||||
},
|
||||
DocumentEdition {
|
||||
index_uid: String,
|
||||
filter_expr: Option<serde_json::Value>,
|
||||
filter_expr: Option<Value>,
|
||||
context: Option<milli::Object>,
|
||||
function: String,
|
||||
},
|
||||
@@ -152,6 +157,12 @@ pub enum KindWithContent {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
},
|
||||
SnapshotCreation,
|
||||
Export {
|
||||
url: String,
|
||||
api_key: Option<String>,
|
||||
payload_size: Option<Byte>,
|
||||
indexes: BTreeMap<IndexUidPattern, ExportIndexSettings>,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
@@ -163,6 +174,13 @@ pub struct IndexSwap {
|
||||
pub indexes: (String, String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ExportIndexSettings {
|
||||
pub filter: Option<Value>,
|
||||
pub override_settings: bool,
|
||||
}
|
||||
|
||||
impl KindWithContent {
|
||||
pub fn as_kind(&self) -> Kind {
|
||||
match self {
|
||||
@@ -180,6 +198,7 @@ impl KindWithContent {
|
||||
KindWithContent::TaskDeletion { .. } => Kind::TaskDeletion,
|
||||
KindWithContent::DumpCreation { .. } => Kind::DumpCreation,
|
||||
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
|
||||
KindWithContent::Export { .. } => Kind::Export,
|
||||
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
|
||||
}
|
||||
}
|
||||
@@ -192,6 +211,7 @@ impl KindWithContent {
|
||||
| SnapshotCreation
|
||||
| TaskCancelation { .. }
|
||||
| TaskDeletion { .. }
|
||||
| Export { .. }
|
||||
| UpgradeDatabase { .. } => vec![],
|
||||
DocumentAdditionOrUpdate { index_uid, .. }
|
||||
| DocumentEdition { index_uid, .. }
|
||||
@@ -269,6 +289,14 @@ impl KindWithContent {
|
||||
}),
|
||||
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
||||
KindWithContent::SnapshotCreation => None,
|
||||
KindWithContent::Export { url, api_key, payload_size, indexes } => {
|
||||
Some(Details::Export {
|
||||
url: url.clone(),
|
||||
api_key: api_key.clone(),
|
||||
payload_size: *payload_size,
|
||||
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
|
||||
})
|
||||
}
|
||||
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
||||
from: (from.0, from.1, from.2),
|
||||
to: (
|
||||
@@ -335,6 +363,14 @@ impl KindWithContent {
|
||||
}),
|
||||
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
||||
KindWithContent::SnapshotCreation => None,
|
||||
KindWithContent::Export { url, api_key, payload_size, indexes } => {
|
||||
Some(Details::Export {
|
||||
url: url.clone(),
|
||||
api_key: api_key.clone(),
|
||||
payload_size: *payload_size,
|
||||
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
|
||||
})
|
||||
}
|
||||
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
||||
from: *from,
|
||||
to: (
|
||||
@@ -383,6 +419,14 @@ impl From<&KindWithContent> for Option<Details> {
|
||||
}),
|
||||
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
||||
KindWithContent::SnapshotCreation => None,
|
||||
KindWithContent::Export { url, api_key, payload_size, indexes } => {
|
||||
Some(Details::Export {
|
||||
url: url.clone(),
|
||||
api_key: api_key.clone(),
|
||||
payload_size: *payload_size,
|
||||
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
|
||||
})
|
||||
}
|
||||
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
||||
from: *from,
|
||||
to: (
|
||||
@@ -499,6 +543,7 @@ pub enum Kind {
|
||||
TaskDeletion,
|
||||
DumpCreation,
|
||||
SnapshotCreation,
|
||||
Export,
|
||||
UpgradeDatabase,
|
||||
}
|
||||
|
||||
@@ -516,6 +561,7 @@ impl Kind {
|
||||
| Kind::TaskCancelation
|
||||
| Kind::TaskDeletion
|
||||
| Kind::DumpCreation
|
||||
| Kind::Export
|
||||
| Kind::UpgradeDatabase
|
||||
| Kind::SnapshotCreation => false,
|
||||
}
|
||||
@@ -536,6 +582,7 @@ impl Display for Kind {
|
||||
Kind::TaskDeletion => write!(f, "taskDeletion"),
|
||||
Kind::DumpCreation => write!(f, "dumpCreation"),
|
||||
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
|
||||
Kind::Export => write!(f, "export"),
|
||||
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
|
||||
}
|
||||
}
|
||||
@@ -568,6 +615,8 @@ impl FromStr for Kind {
|
||||
Ok(Kind::DumpCreation)
|
||||
} else if kind.eq_ignore_ascii_case("snapshotCreation") {
|
||||
Ok(Kind::SnapshotCreation)
|
||||
} else if kind.eq_ignore_ascii_case("export") {
|
||||
Ok(Kind::Export)
|
||||
} else if kind.eq_ignore_ascii_case("upgradeDatabase") {
|
||||
Ok(Kind::UpgradeDatabase)
|
||||
} else {
|
||||
@@ -643,12 +692,33 @@ pub enum Details {
|
||||
IndexSwap {
|
||||
swaps: Vec<IndexSwap>,
|
||||
},
|
||||
Export {
|
||||
url: String,
|
||||
api_key: Option<String>,
|
||||
payload_size: Option<Byte>,
|
||||
indexes: BTreeMap<IndexUidPattern, DetailsExportIndexSettings>,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
to: (u32, u32, u32),
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct DetailsExportIndexSettings {
|
||||
#[serde(flatten)]
|
||||
pub settings: ExportIndexSettings,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub matched_documents: Option<u64>,
|
||||
}
|
||||
|
||||
impl From<ExportIndexSettings> for DetailsExportIndexSettings {
|
||||
fn from(settings: ExportIndexSettings) -> Self {
|
||||
DetailsExportIndexSettings { settings, matched_documents: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl Details {
|
||||
pub fn to_failed(&self) -> Self {
|
||||
let mut details = self.clone();
|
||||
@@ -667,6 +737,7 @@ impl Details {
|
||||
Self::SettingsUpdate { .. }
|
||||
| Self::IndexInfo { .. }
|
||||
| Self::Dump { .. }
|
||||
| Self::Export { .. }
|
||||
| Self::UpgradeDatabase { .. }
|
||||
| Self::IndexSwap { .. } => (),
|
||||
}
|
||||
|
||||
@@ -13,52 +13,50 @@ license.workspace = true
|
||||
default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.0"
|
||||
actix-http = { version = "3.9.0", default-features = false, features = [
|
||||
actix-cors = "0.7.1"
|
||||
actix-http = { version = "3.11.0", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
actix-utils = "3.0.1"
|
||||
actix-web = { version = "4.9.0", default-features = false, features = [
|
||||
actix-web = { version = "4.11.0", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
anyhow = { version = "1.0.95", features = ["backtrace"] }
|
||||
async-trait = "0.1.85"
|
||||
bstr = "1.11.3"
|
||||
anyhow = { version = "1.0.98", features = ["backtrace"] }
|
||||
bstr = "1.12.0"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
bytes = "1.9.0"
|
||||
bumpalo = "3.16.0"
|
||||
clap = { version = "4.5.24", features = ["derive", "env"] }
|
||||
bytes = "1.10.1"
|
||||
bumpalo = "3.18.1"
|
||||
clap = { version = "4.5.40", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.15"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.13.0"
|
||||
either = "1.15.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.35"
|
||||
flate2 = "1.1.2"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3.31"
|
||||
futures-util = "0.3.31"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "2.7.0", features = ["serde"] }
|
||||
is-terminal = "0.4.13"
|
||||
indexmap = { version = "2.9.0", features = ["serde"] }
|
||||
is-terminal = "0.4.16"
|
||||
itertools = "0.14.0"
|
||||
jsonwebtoken = "9.3.0"
|
||||
jsonwebtoken = "9.3.1"
|
||||
lazy_static = "1.5.0"
|
||||
liquid = "0.26.9"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.16.0"
|
||||
num_cpus = "1.17.0"
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.20.2"
|
||||
ordered-float = "4.6.0"
|
||||
parking_lot = "0.12.3"
|
||||
once_cell = "1.21.3"
|
||||
ordered-float = "5.0.0"
|
||||
parking_lot = "0.12.4"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.16"
|
||||
platform-dirs = "0.3.0"
|
||||
@@ -66,44 +64,44 @@ prometheus = { version = "0.14.0", features = ["process"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.10.0"
|
||||
regex = "1.11.1"
|
||||
reqwest = { version = "0.12.12", features = [
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
rustls = { version = "0.23.20", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.10.1", features = ["alloc"] }
|
||||
rustls = { version = "0.23.28", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.12.0", features = ["alloc"] }
|
||||
rustls-pemfile = "2.2.0"
|
||||
segment = { version = "0.2.5" }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
segment = { version = "0.2.6" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
sha2 = "0.10.9"
|
||||
siphasher = "1.0.1"
|
||||
slice-group-by = "0.3.1"
|
||||
static-files = { version = "0.2.4", optional = true }
|
||||
sysinfo = "0.33.1"
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
static-files = { version = "0.2.5", optional = true }
|
||||
sysinfo = "0.35.2"
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.43.1", features = ["full"] }
|
||||
toml = "0.8.19"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
tokio = { version = "1.45.1", features = ["full"] }
|
||||
toml = "0.8.23"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.4", features = ["serde"] }
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = { version = "0.3.19", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.15"
|
||||
tracing-actix-web = "0.7.18"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
roaring = "0.10.10"
|
||||
roaring = "0.10.12"
|
||||
mopa-maintained = "0.2.3"
|
||||
utoipa = { version = "5.3.1", features = [
|
||||
utoipa = { version = "5.4.0", features = [
|
||||
"actix_extras",
|
||||
"macros",
|
||||
"non_strict_integers",
|
||||
@@ -119,29 +117,29 @@ actix-web-lab = { version = "0.24.1", default-features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.10.0"
|
||||
brotli = "6.0.0"
|
||||
brotli = "8.0.1"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
insta = { version = "=1.39.0", features = ["redactions"] }
|
||||
manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
temp-env = "0.3.6"
|
||||
urlencoding = "2.1.3"
|
||||
wiremock = "0.6.2"
|
||||
wiremock = "0.6.3"
|
||||
yaup = "0.3.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.95", optional = true }
|
||||
cargo_toml = { version = "0.21.0", optional = true }
|
||||
anyhow = { version = "1.0.98", optional = true }
|
||||
cargo_toml = { version = "0.22.1", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.12.12", features = [
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
"blocking",
|
||||
"rustls-tls",
|
||||
], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.4", optional = true }
|
||||
tempfile = { version = "3.15.0", optional = true }
|
||||
zip = { version = "2.3.0", optional = true }
|
||||
static-files = { version = "0.2.5", optional = true }
|
||||
tempfile = { version = "3.20.0", optional = true }
|
||||
zip = { version = "4.1.0", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
@@ -171,5 +169,5 @@ german = ["meilisearch-types/german"]
|
||||
turkish = ["meilisearch-types/turkish"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.19/build.zip"
|
||||
sha1 = "7974430d5277c97f67cf6e95eec6faaac2788834"
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.20/build.zip"
|
||||
sha1 = "82a7ddd7bf14bb5323c3d235d2b62892a98b6a59"
|
||||
|
||||
@@ -197,11 +197,13 @@ struct Infos {
|
||||
experimental_max_number_of_batched_tasks: usize,
|
||||
experimental_limit_batched_tasks_total_size: u64,
|
||||
experimental_network: bool,
|
||||
experimental_multimodal: bool,
|
||||
experimental_chat_completions: bool,
|
||||
experimental_get_task_documents_route: bool,
|
||||
experimental_composite_embedders: bool,
|
||||
experimental_embedding_cache_entries: usize,
|
||||
experimental_no_snapshot_compaction: bool,
|
||||
experimental_no_edition_2024_for_settings: bool,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
@@ -286,8 +288,12 @@ impl Infos {
|
||||
ScheduleSnapshot::Enabled(interval) => Some(interval),
|
||||
};
|
||||
|
||||
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
|
||||
indexer_options;
|
||||
let IndexerOpts {
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
} = indexer_options;
|
||||
|
||||
let RuntimeTogglableFeatures {
|
||||
metrics,
|
||||
@@ -298,6 +304,7 @@ impl Infos {
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
} = features;
|
||||
|
||||
// We're going to override every sensible information.
|
||||
@@ -317,6 +324,7 @@ impl Infos {
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_network: network,
|
||||
experimental_chat_completions: chat_completions,
|
||||
experimental_multimodal: multimodal,
|
||||
experimental_get_task_documents_route: get_task_documents_route,
|
||||
experimental_composite_embedders: composite_embedders,
|
||||
experimental_embedding_cache_entries,
|
||||
@@ -350,6 +358,7 @@ impl Infos {
|
||||
ssl_require_auth,
|
||||
ssl_resumption,
|
||||
ssl_tickets,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,8 +76,10 @@ pub enum MeilisearchHttpError {
|
||||
DocumentFormat(#[from] DocumentFormatError),
|
||||
#[error(transparent)]
|
||||
Join(#[from] JoinError),
|
||||
#[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
|
||||
#[error("Invalid request: missing `hybrid` parameter when `vector` or `media` are present.")]
|
||||
MissingSearchHybrid,
|
||||
#[error("Invalid request: both `media` and `vector` parameters are present.")]
|
||||
MediaAndVector,
|
||||
}
|
||||
|
||||
impl MeilisearchHttpError {
|
||||
@@ -111,6 +113,7 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
||||
MeilisearchHttpError::MediaAndVector => Code::InvalidSearchMediaAndVector,
|
||||
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
|
||||
Code::InvalidMultiSearchFederationOptions
|
||||
}
|
||||
|
||||
@@ -37,7 +37,10 @@ use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
||||
use meilisearch_auth::{open_auth_store_env, AuthController};
|
||||
use meilisearch_types::milli::constants::VERSION_MAJOR;
|
||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
|
||||
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||
use meilisearch_types::milli::update::{
|
||||
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
|
||||
};
|
||||
use meilisearch_types::settings::apply_settings_to_builder;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::versioning::{
|
||||
@@ -461,6 +464,7 @@ fn import_dump(
|
||||
index_scheduler: &mut IndexScheduler,
|
||||
auth: &mut AuthController,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let progress = Progress::default();
|
||||
let reader = File::open(dump_path)?;
|
||||
let mut dump_reader = dump::DumpReader::open(reader)?;
|
||||
|
||||
@@ -494,19 +498,37 @@ fn import_dump(
|
||||
keys.push(key);
|
||||
}
|
||||
|
||||
// 3. Import the runtime features and network
|
||||
// 3. Import the `ChatCompletionSettings`s.
|
||||
for result in dump_reader.chat_completions_settings()? {
|
||||
let (name, settings) = result?;
|
||||
index_scheduler.put_chat_settings(&name, &settings)?;
|
||||
}
|
||||
|
||||
// 4. Import the runtime features and network
|
||||
let features = dump_reader.features()?.unwrap_or_default();
|
||||
index_scheduler.put_runtime_features(features)?;
|
||||
|
||||
let network = dump_reader.network()?.cloned().unwrap_or_default();
|
||||
index_scheduler.put_network(network)?;
|
||||
|
||||
let indexer_config = index_scheduler.indexer_config();
|
||||
// 4.1 Use all cpus to process dump if `max_indexing_threads` not configured
|
||||
let backup_config;
|
||||
let base_config = index_scheduler.indexer_config();
|
||||
|
||||
let indexer_config = if base_config.max_threads.is_none() {
|
||||
let (thread_pool, _) = default_thread_pool_and_threads();
|
||||
|
||||
let _config = IndexerConfig { thread_pool, ..*base_config };
|
||||
backup_config = _config;
|
||||
&backup_config
|
||||
} else {
|
||||
base_config
|
||||
};
|
||||
|
||||
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
|
||||
// try to process tasks while we're trying to import the indexes.
|
||||
|
||||
// 4. Import the indexes.
|
||||
// 5. Import the indexes.
|
||||
for index_reader in dump_reader.indexes()? {
|
||||
let mut index_reader = index_reader?;
|
||||
let metadata = index_reader.metadata();
|
||||
@@ -519,20 +541,20 @@ fn import_dump(
|
||||
let mut wtxn = index.write_txn()?;
|
||||
|
||||
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
|
||||
// 4.1 Import the primary key if there is one.
|
||||
// 5.1 Import the primary key if there is one.
|
||||
if let Some(ref primary_key) = metadata.primary_key {
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
}
|
||||
|
||||
// 4.2 Import the settings.
|
||||
// 5.2 Import the settings.
|
||||
tracing::info!("Importing the settings.");
|
||||
let settings = index_reader.settings()?;
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
builder
|
||||
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
|
||||
let embedder_stats: Arc<EmbedderStats> = Default::default();
|
||||
builder.execute(&|| false, &progress, embedder_stats.clone())?;
|
||||
|
||||
// 4.3 Import the documents.
|
||||
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
// 5.3 Import the documents.
|
||||
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
tracing::info!("Importing the documents.");
|
||||
let file = tempfile::tempfile()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
||||
@@ -543,11 +565,11 @@ fn import_dump(
|
||||
// This flush the content of the batch builder.
|
||||
let file = builder.into_inner()?.into_inner()?;
|
||||
|
||||
// 4.3.2 We feed it to the milli index.
|
||||
// 5.3.2 We feed it to the milli index.
|
||||
let reader = BufReader::new(file);
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
let embedder_configs = index.embedding_configs(&wtxn)?;
|
||||
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
|
||||
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
|
||||
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
@@ -560,6 +582,7 @@ fn import_dump(
|
||||
},
|
||||
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
||||
|| false,
|
||||
&embedder_stats,
|
||||
)?;
|
||||
|
||||
let builder = builder.with_embedders(embedders);
|
||||
@@ -574,15 +597,15 @@ fn import_dump(
|
||||
index_scheduler.refresh_index_stats(&uid)?;
|
||||
}
|
||||
|
||||
// 5. Import the queue
|
||||
// 6. Import the queue
|
||||
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
||||
// 5.1. Import the batches
|
||||
// 6.1. Import the batches
|
||||
for ret in dump_reader.batches()? {
|
||||
let batch = ret?;
|
||||
index_scheduler_dump.register_dumped_batch(batch)?;
|
||||
}
|
||||
|
||||
// 5.2. Import the tasks
|
||||
// 6.2. Import the tasks
|
||||
for ret in dump_reader.tasks()? {
|
||||
let (task, file) = ret?;
|
||||
index_scheduler_dump.register_dumped_task(task, file)?;
|
||||
|
||||
@@ -15,6 +15,33 @@ lazy_static! {
|
||||
"Meilisearch number of degraded search requests"
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_CHAT_SEARCH_REQUESTS: IntCounterVec = register_int_counter_vec!(
|
||||
opts!(
|
||||
"meilisearch_chat_search_requests",
|
||||
"Meilisearch number of search requests performed by the chat route itself"
|
||||
),
|
||||
&["type"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
|
||||
opts!("meilisearch_chat_prompt_tokens_usage", "Meilisearch Chat Prompt Tokens Usage"),
|
||||
&["workspace", "model"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE: IntCounterVec =
|
||||
register_int_counter_vec!(
|
||||
opts!(
|
||||
"meilisearch_chat_completion_tokens_usage",
|
||||
"Meilisearch Chat Completion Tokens Usage"
|
||||
),
|
||||
&["workspace", "model"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
|
||||
opts!("meilisearch_chat_total_tokens_usage", "Meilisearch Chat Total Tokens Usage"),
|
||||
&["workspace", "model"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
|
||||
.expect("Can't create a metric");
|
||||
|
||||
@@ -53,6 +53,8 @@ const MEILI_EXPERIMENTAL_DUMPLESS_UPGRADE: &str = "MEILI_EXPERIMENTAL_DUMPLESS_U
|
||||
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
|
||||
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
||||
@@ -62,7 +64,7 @@ const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
|
||||
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
|
||||
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
|
||||
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE";
|
||||
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE";
|
||||
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
||||
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
|
||||
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
||||
@@ -749,22 +751,43 @@ pub struct IndexerOpts {
|
||||
#[clap(skip)]
|
||||
#[serde(skip)]
|
||||
pub skip_index_budget: bool,
|
||||
|
||||
/// Experimental no edition 2024 for settings feature. For more information,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/847>
|
||||
///
|
||||
/// Enables the experimental no edition 2024 for settings feature.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_settings: bool,
|
||||
}
|
||||
|
||||
impl IndexerOpts {
|
||||
/// Exports the values to their corresponding env vars if they are not set.
|
||||
pub fn export_to_env(self) {
|
||||
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = self;
|
||||
let IndexerOpts {
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
} = self;
|
||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_MAX_INDEXING_MEMORY,
|
||||
max_indexing_memory.to_string(),
|
||||
);
|
||||
}
|
||||
export_to_env_if_not_present(
|
||||
MEILI_MAX_INDEXING_THREADS,
|
||||
max_indexing_threads.0.to_string(),
|
||||
);
|
||||
if let Some(max_indexing_threads) = max_indexing_threads.0 {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_MAX_INDEXING_THREADS,
|
||||
max_indexing_threads.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_no_edition_2024_for_settings {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS,
|
||||
experimental_no_edition_2024_for_settings.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -772,18 +795,23 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
|
||||
let thread_pool = ThreadPoolNoAbortBuilder::new()
|
||||
.thread_name(|index| format!("indexing-thread:{index}"))
|
||||
.num_threads(*other.max_indexing_threads)
|
||||
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
|
||||
.num_threads(other.max_indexing_threads.unwrap_or_else(|| num_cpus::get() / 2))
|
||||
.build()?;
|
||||
|
||||
Ok(Self {
|
||||
thread_pool,
|
||||
log_every_n: Some(DEFAULT_LOG_EVERY_N),
|
||||
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
|
||||
thread_pool: Some(thread_pool),
|
||||
max_threads: *other.max_indexing_threads,
|
||||
max_positions_per_attributes: None,
|
||||
skip_index_budget: other.skip_index_budget,
|
||||
..Default::default()
|
||||
experimental_no_edition_2024_for_settings: other
|
||||
.experimental_no_edition_2024_for_settings,
|
||||
chunk_compression_type: Default::default(),
|
||||
chunk_compression_level: Default::default(),
|
||||
documents_chunk_size: Default::default(),
|
||||
max_nb_chunks: Default::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -843,31 +871,31 @@ fn total_memory_bytes() -> Option<u64> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
|
||||
pub struct MaxThreads(usize);
|
||||
#[derive(Default, Debug, Clone, Copy, Deserialize, Serialize)]
|
||||
pub struct MaxThreads(Option<usize>);
|
||||
|
||||
impl FromStr for MaxThreads {
|
||||
type Err = ParseIntError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
usize::from_str(s).map(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MaxThreads {
|
||||
fn default() -> Self {
|
||||
MaxThreads(num_cpus::get() / 2)
|
||||
fn from_str(s: &str) -> Result<MaxThreads, Self::Err> {
|
||||
if s.is_empty() || s == "unlimited" {
|
||||
return Ok(MaxThreads::default());
|
||||
}
|
||||
usize::from_str(s).map(Some).map(MaxThreads)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MaxThreads {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
match self.0 {
|
||||
Some(threads) => write!(f, "{}", threads),
|
||||
None => write!(f, "unlimited"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for MaxThreads {
|
||||
type Target = usize;
|
||||
type Target = Option<usize>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
|
||||
@@ -1,291 +0,0 @@
|
||||
use std::mem;
|
||||
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::{Either, HttpResponse, Responder};
|
||||
use actix_web_lab::sse::{self, Event};
|
||||
use async_openai::config::OpenAIConfig;
|
||||
use async_openai::types::{
|
||||
ChatCompletionRequestAssistantMessageArgs, ChatCompletionRequestMessage,
|
||||
ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent,
|
||||
ChatCompletionToolArgs, ChatCompletionToolType, CreateChatCompletionRequest, FinishReason,
|
||||
FunctionObjectArgs,
|
||||
};
|
||||
use async_openai::Client;
|
||||
use futures::StreamExt;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::prompt::PromptData;
|
||||
use meilisearch_types::milli::vector::EmbeddingConfig;
|
||||
use meilisearch_types::{Document, Index};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, HybridQuery, RetrieveVectors, SearchQuery, SemanticRatio,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
/// The default description of the searchInIndex tool provided to OpenAI.
|
||||
const DEFAULT_SEARCH_IN_INDEX_TOOL_DESCRIPTION: &str =
|
||||
"Search the database for relevant JSON documents using an optional query.";
|
||||
/// The default description of the searchInIndex `q` parameter tool provided to OpenAI.
|
||||
const DEFAULT_SEARCH_IN_INDEX_Q_PARAMETER_TOOL_DESCRIPTION: &str =
|
||||
"The search query string used to find relevant documents in the index. \
|
||||
This should contain keywords or phrases that best represent what the user is looking for. \
|
||||
More specific queries will yield more precise results.";
|
||||
/// The default description of the searchInIndex `index` parameter tool provided to OpenAI.
|
||||
const DEFAULT_SEARCH_IN_INDEX_INDEX_PARAMETER_TOOL_DESCRIPTION: &str =
|
||||
"The name of the index to search within. An index is a collection of documents organized for search. \
|
||||
Selecting the right index ensures the most relevant results for the user query";
|
||||
|
||||
const EMBEDDER_NAME: &str = "openai";
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(chat)));
|
||||
}
|
||||
|
||||
/// Get a chat completion
|
||||
async fn chat(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::CHAT_GET }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
web::Json(mut chat_completion): web::Json<CreateChatCompletionRequest>,
|
||||
) -> impl Responder {
|
||||
// To enable later on, when the feature will be experimental
|
||||
// index_scheduler.features().check_chat("Using the /chat route")?;
|
||||
|
||||
if chat_completion.stream.unwrap_or(false) {
|
||||
Either::Right(streamed_chat(index_scheduler, search_queue, chat_completion).await)
|
||||
} else {
|
||||
Either::Left(non_streamed_chat(index_scheduler, search_queue, chat_completion).await)
|
||||
}
|
||||
}
|
||||
|
||||
async fn non_streamed_chat(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::CHAT_GET }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
mut chat_completion: CreateChatCompletionRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let api_key = std::env::var("MEILI_OPENAI_API_KEY")
|
||||
.expect("cannot find OpenAI API Key (MEILI_OPENAI_API_KEY)");
|
||||
let config = OpenAIConfig::default().with_api_key(&api_key); // we can also change the API base
|
||||
let client = Client::with_config(config);
|
||||
|
||||
assert_eq!(
|
||||
chat_completion.n.unwrap_or(1),
|
||||
1,
|
||||
"Meilisearch /chat only support one completion at a time (n = 1, n = null)"
|
||||
);
|
||||
|
||||
let rtxn = index_scheduler.read_txn().unwrap();
|
||||
let search_in_index_description = index_scheduler
|
||||
.chat_prompts(&rtxn, "searchInIndex-description")
|
||||
.unwrap()
|
||||
.unwrap_or(DEFAULT_SEARCH_IN_INDEX_TOOL_DESCRIPTION)
|
||||
.to_string();
|
||||
let search_in_index_q_param_description = index_scheduler
|
||||
.chat_prompts(&rtxn, "searchInIndex-q-param-description")
|
||||
.unwrap()
|
||||
.unwrap_or(DEFAULT_SEARCH_IN_INDEX_Q_PARAMETER_TOOL_DESCRIPTION)
|
||||
.to_string();
|
||||
let search_in_index_index_description = index_scheduler
|
||||
.chat_prompts(&rtxn, "searchInIndex-index-param-description")
|
||||
.unwrap()
|
||||
.unwrap_or(DEFAULT_SEARCH_IN_INDEX_INDEX_PARAMETER_TOOL_DESCRIPTION)
|
||||
.to_string();
|
||||
drop(rtxn);
|
||||
|
||||
let mut response;
|
||||
loop {
|
||||
let tools = chat_completion.tools.get_or_insert_default();
|
||||
tools.push(
|
||||
ChatCompletionToolArgs::default()
|
||||
.r#type(ChatCompletionToolType::Function)
|
||||
.function(
|
||||
FunctionObjectArgs::default()
|
||||
.name("searchInIndex")
|
||||
.description(&search_in_index_description)
|
||||
.parameters(json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index_uid": {
|
||||
"type": "string",
|
||||
"enum": ["main"],
|
||||
"description": search_in_index_index_description,
|
||||
},
|
||||
"q": {
|
||||
"type": ["string", "null"],
|
||||
"description": search_in_index_q_param_description,
|
||||
}
|
||||
},
|
||||
"required": ["index_uid", "q"],
|
||||
"additionalProperties": false,
|
||||
}))
|
||||
.strict(true)
|
||||
.build()
|
||||
.unwrap(),
|
||||
)
|
||||
.build()
|
||||
.unwrap(),
|
||||
);
|
||||
response = client.chat().create(chat_completion.clone()).await.unwrap();
|
||||
|
||||
let choice = &mut response.choices[0];
|
||||
match choice.finish_reason {
|
||||
Some(FinishReason::ToolCalls) => {
|
||||
let tool_calls = mem::take(&mut choice.message.tool_calls).unwrap_or_default();
|
||||
|
||||
let (meili_calls, other_calls): (Vec<_>, Vec<_>) =
|
||||
tool_calls.into_iter().partition(|call| call.function.name == "searchInIndex");
|
||||
|
||||
chat_completion.messages.push(
|
||||
ChatCompletionRequestAssistantMessageArgs::default()
|
||||
.tool_calls(meili_calls.clone())
|
||||
.build()
|
||||
.unwrap()
|
||||
.into(),
|
||||
);
|
||||
|
||||
for call in meili_calls {
|
||||
let SearchInIndexParameters { index_uid, q } =
|
||||
serde_json::from_str(&call.function.arguments).unwrap();
|
||||
|
||||
let mut query = SearchQuery {
|
||||
q,
|
||||
hybrid: Some(HybridQuery {
|
||||
semantic_ratio: SemanticRatio::default(),
|
||||
embedder: EMBEDDER_NAME.to_string(),
|
||||
}),
|
||||
limit: 20,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) =
|
||||
index_scheduler.filters().get_index_search_rules(&index_uid)
|
||||
{
|
||||
add_search_rules(&mut query.filter, search_rules);
|
||||
}
|
||||
|
||||
// TBD
|
||||
// let mut aggregate = SearchAggregator::<SearchPOST>::from_query(&query);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let search_kind = search_kind(
|
||||
&query,
|
||||
index_scheduler.get_ref(),
|
||||
index_uid.to_string(),
|
||||
&index,
|
||||
)?;
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let features = index_scheduler.features();
|
||||
let index_cloned = index.clone();
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
&index_cloned,
|
||||
query,
|
||||
search_kind,
|
||||
RetrieveVectors::new(false),
|
||||
features,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
|
||||
let search_result = search_result?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
// aggregate.succeed(search_result);
|
||||
if search_result.degraded {
|
||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
||||
}
|
||||
}
|
||||
// analytics.publish(aggregate, &req);
|
||||
|
||||
let search_result = search_result?;
|
||||
let formatted = format_documents(
|
||||
&index,
|
||||
search_result.hits.into_iter().map(|doc| doc.document),
|
||||
);
|
||||
let text = formatted.join("\n");
|
||||
chat_completion.messages.push(ChatCompletionRequestMessage::Tool(
|
||||
ChatCompletionRequestToolMessage {
|
||||
tool_call_id: call.id,
|
||||
content: ChatCompletionRequestToolMessageContent::Text(text),
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
// Let the client call other tools by themselves
|
||||
if !other_calls.is_empty() {
|
||||
response.choices[0].message.tool_calls = Some(other_calls);
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(HttpResponse::Ok().json(response))
|
||||
}
|
||||
|
||||
async fn streamed_chat(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::CHAT_GET }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
mut chat_completion: CreateChatCompletionRequest,
|
||||
) -> impl Responder {
|
||||
assert!(chat_completion.stream.unwrap_or(false));
|
||||
|
||||
let api_key = std::env::var("MEILI_OPENAI_API_KEY")
|
||||
.expect("cannot find OpenAI API Key (MEILI_OPENAI_API_KEY)");
|
||||
let config = OpenAIConfig::default().with_api_key(&api_key); // we can also change the API base
|
||||
let client = Client::with_config(config);
|
||||
let response = client.chat().create_stream(chat_completion).await.unwrap();
|
||||
actix_web_lab::sse::Sse::from_stream(response.map(|response| {
|
||||
response
|
||||
.map(|mut r| Event::Data(sse::Data::new_json(r.choices.pop().unwrap().delta).unwrap()))
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct SearchInIndexParameters {
|
||||
/// The index uid to search in.
|
||||
index_uid: String,
|
||||
/// The query parameter to use.
|
||||
q: Option<String>,
|
||||
}
|
||||
|
||||
fn format_documents(index: &Index, documents: impl Iterator<Item = Document>) -> Vec<String> {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let IndexEmbeddingConfig { name: _, config, user_provided: _ } = index
|
||||
.embedding_configs(&rtxn)
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.find(|conf| conf.name == EMBEDDER_NAME)
|
||||
.unwrap();
|
||||
|
||||
let EmbeddingConfig {
|
||||
embedder_options: _,
|
||||
prompt: PromptData { template, max_bytes },
|
||||
quantized: _,
|
||||
} = config;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct Doc<T: Serialize> {
|
||||
doc: T,
|
||||
}
|
||||
|
||||
let template = liquid::ParserBuilder::with_stdlib().build().unwrap().parse(&template).unwrap();
|
||||
documents
|
||||
.map(|doc| {
|
||||
let object = liquid::to_object(&Doc { doc }).unwrap();
|
||||
template.render(&object).unwrap()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
135
crates/meilisearch/src/routes/chats/chat_completion_analytics.rs
Normal file
135
crates/meilisearch/src/routes/chats/chat_completion_analytics.rs
Normal file
@@ -0,0 +1,135 @@
|
||||
use std::collections::BinaryHeap;
|
||||
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::analytics::Aggregate;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ChatCompletionAggregator {
|
||||
// requests
|
||||
total_received: usize,
|
||||
total_succeeded: usize,
|
||||
time_spent: BinaryHeap<usize>,
|
||||
|
||||
// chat completion specific metrics
|
||||
total_messages: usize,
|
||||
total_streamed_requests: usize,
|
||||
total_non_streamed_requests: usize,
|
||||
|
||||
// model usage tracking
|
||||
models_used: std::collections::HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl ChatCompletionAggregator {
|
||||
pub fn from_request(model: &str, message_count: usize, is_stream: bool) -> Self {
|
||||
let mut models_used = std::collections::HashMap::new();
|
||||
models_used.insert(model.to_string(), 1);
|
||||
|
||||
Self {
|
||||
total_received: 1,
|
||||
total_succeeded: 0,
|
||||
time_spent: BinaryHeap::new(),
|
||||
|
||||
total_messages: message_count,
|
||||
total_streamed_requests: if is_stream { 1 } else { 0 },
|
||||
total_non_streamed_requests: if is_stream { 0 } else { 1 },
|
||||
|
||||
models_used,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, time_spent: std::time::Duration) {
|
||||
self.total_succeeded += 1;
|
||||
self.time_spent.push(time_spent.as_millis() as usize);
|
||||
}
|
||||
}
|
||||
|
||||
impl Aggregate for ChatCompletionAggregator {
|
||||
fn event_name(&self) -> &'static str {
|
||||
"Chat Completion POST"
|
||||
}
|
||||
|
||||
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
||||
let Self {
|
||||
total_received,
|
||||
total_succeeded,
|
||||
mut time_spent,
|
||||
total_messages,
|
||||
total_streamed_requests,
|
||||
total_non_streamed_requests,
|
||||
models_used,
|
||||
..
|
||||
} = *new;
|
||||
|
||||
// Aggregate time spent
|
||||
self.time_spent.append(&mut time_spent);
|
||||
|
||||
// Aggregate counters
|
||||
self.total_received = self.total_received.saturating_add(total_received);
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
||||
self.total_messages = self.total_messages.saturating_add(total_messages);
|
||||
self.total_streamed_requests =
|
||||
self.total_streamed_requests.saturating_add(total_streamed_requests);
|
||||
self.total_non_streamed_requests =
|
||||
self.total_non_streamed_requests.saturating_add(total_non_streamed_requests);
|
||||
|
||||
// Aggregate model usage
|
||||
for (model, count) in models_used {
|
||||
*self.models_used.entry(model).or_insert(0) += count;
|
||||
}
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
fn into_event(self: Box<Self>) -> Value {
|
||||
let Self {
|
||||
total_received,
|
||||
total_succeeded,
|
||||
time_spent,
|
||||
total_messages,
|
||||
total_streamed_requests,
|
||||
total_non_streamed_requests,
|
||||
models_used,
|
||||
..
|
||||
} = *self;
|
||||
|
||||
// Compute time statistics
|
||||
let time_spent: Vec<usize> = time_spent.into_sorted_vec();
|
||||
let (max_time, min_time, avg_time) = if time_spent.is_empty() {
|
||||
(0, 0, 0)
|
||||
} else {
|
||||
let max_time = time_spent.last().unwrap_or(&0);
|
||||
let min_time = time_spent.first().unwrap_or(&0);
|
||||
let sum: usize = time_spent.iter().sum();
|
||||
let avg_time = sum / time_spent.len();
|
||||
(*max_time, *min_time, avg_time)
|
||||
};
|
||||
|
||||
// Compute average messages per request
|
||||
let avg_messages_per_request =
|
||||
if total_received > 0 { total_messages as f64 / total_received as f64 } else { 0.0 };
|
||||
|
||||
// Compute streaming vs non-streaming proportions
|
||||
let streaming_ratio = if total_received > 0 {
|
||||
total_streamed_requests as f64 / total_received as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
json!({
|
||||
"total_received": total_received,
|
||||
"total_succeeded": total_succeeded,
|
||||
"time_spent": {
|
||||
"max": max_time,
|
||||
"min": min_time,
|
||||
"avg": avg_time
|
||||
},
|
||||
"total_messages": total_messages,
|
||||
"avg_messages_per_request": avg_messages_per_request,
|
||||
"total_streamed_requests": total_streamed_requests,
|
||||
"total_non_streamed_requests": total_non_streamed_requests,
|
||||
"streaming_ratio": streaming_ratio,
|
||||
"models_used": models_used,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -13,9 +13,9 @@ use async_openai::types::{
|
||||
ChatCompletionRequestDeveloperMessageContent, ChatCompletionRequestMessage,
|
||||
ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent,
|
||||
ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent,
|
||||
ChatCompletionStreamResponseDelta, ChatCompletionToolArgs, ChatCompletionToolType,
|
||||
CreateChatCompletionRequest, CreateChatCompletionStreamResponse, FinishReason, FunctionCall,
|
||||
FunctionCallStream, FunctionObjectArgs,
|
||||
ChatCompletionStreamOptions, ChatCompletionStreamResponseDelta, ChatCompletionToolArgs,
|
||||
ChatCompletionToolType, CreateChatCompletionRequest, CreateChatCompletionStreamResponse,
|
||||
FinishReason, FunctionCall, FunctionCallStream, FunctionObjectArgs,
|
||||
};
|
||||
use async_openai::Client;
|
||||
use bumpalo::Bump;
|
||||
@@ -24,8 +24,8 @@ use index_scheduler::IndexScheduler;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::features::{
|
||||
ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSettings as DbChatSettings,
|
||||
SystemRole,
|
||||
ChatCompletionPrompts as DbChatCompletionPrompts,
|
||||
ChatCompletionSource as DbChatCompletionSource, SystemRole,
|
||||
};
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::index::ChatConfig;
|
||||
@@ -36,17 +36,23 @@ use serde_json::json;
|
||||
use tokio::runtime::Handle;
|
||||
use tokio::sync::mpsc::error::SendError;
|
||||
|
||||
use super::chat_completion_analytics::ChatCompletionAggregator;
|
||||
use super::config::Config;
|
||||
use super::errors::StreamErrorEvent;
|
||||
use super::errors::{MistralError, OpenAiOutsideError, StreamErrorEvent};
|
||||
use super::utils::format_documents;
|
||||
use super::{
|
||||
ChatsParam, MEILI_APPEND_CONVERSATION_MESSAGE_NAME, MEILI_SEARCH_IN_INDEX_FUNCTION_NAME,
|
||||
MEILI_SEARCH_PROGRESS_NAME, MEILI_SEARCH_SOURCES_NAME,
|
||||
};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _};
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::metrics::{
|
||||
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE, MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE,
|
||||
MEILISEARCH_CHAT_SEARCH_REQUESTS, MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE,
|
||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS,
|
||||
};
|
||||
use crate::routes::chats::utils::SseEventSender;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::search::{add_search_rules, prepare_search, search_from_kind, SearchQuery};
|
||||
@@ -64,15 +70,10 @@ async fn chat(
|
||||
req: HttpRequest,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
web::Json(chat_completion): web::Json<CreateChatCompletionRequest>,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> impl Responder {
|
||||
let ChatsParam { workspace_uid } = chats_param.into_inner();
|
||||
|
||||
assert_eq!(
|
||||
chat_completion.n.unwrap_or(1),
|
||||
1,
|
||||
"Meilisearch /chat only support one completion at a time (n = 1, n = null)"
|
||||
);
|
||||
|
||||
if chat_completion.stream.unwrap_or(false) {
|
||||
Either::Right(
|
||||
streamed_chat(
|
||||
@@ -82,6 +83,7 @@ async fn chat(
|
||||
&workspace_uid,
|
||||
req,
|
||||
chat_completion,
|
||||
analytics,
|
||||
)
|
||||
.await,
|
||||
)
|
||||
@@ -94,6 +96,7 @@ async fn chat(
|
||||
&workspace_uid,
|
||||
req,
|
||||
chat_completion,
|
||||
analytics,
|
||||
)
|
||||
.await,
|
||||
)
|
||||
@@ -122,8 +125,24 @@ fn setup_search_tool(
|
||||
system_role: SystemRole,
|
||||
) -> Result<FunctionSupport, ResponseError> {
|
||||
let tools = chat_completion.tools.get_or_insert_default();
|
||||
if tools.iter().any(|t| t.function.name == MEILI_SEARCH_IN_INDEX_FUNCTION_NAME) {
|
||||
panic!("{MEILI_SEARCH_IN_INDEX_FUNCTION_NAME} function already set");
|
||||
for tool in &tools[..] {
|
||||
match tool.function.name.as_str() {
|
||||
MEILI_SEARCH_IN_INDEX_FUNCTION_NAME => {
|
||||
return Err(ResponseError::from_msg(
|
||||
format!("{MEILI_SEARCH_IN_INDEX_FUNCTION_NAME} function is already defined."),
|
||||
Code::BadRequest,
|
||||
));
|
||||
}
|
||||
MEILI_SEARCH_PROGRESS_NAME
|
||||
| MEILI_SEARCH_SOURCES_NAME
|
||||
| MEILI_APPEND_CONVERSATION_MESSAGE_NAME => (),
|
||||
external_function_name => {
|
||||
return Err(ResponseError::from_msg(
|
||||
format!("{external_function_name}: External functions are not supported yet."),
|
||||
Code::UnimplementedExternalFunctionCalling,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove internal tools used for front-end notifications as they should be hidden from the LLM.
|
||||
@@ -229,9 +248,6 @@ async fn process_search_request(
|
||||
index_uid: String,
|
||||
q: Option<String>,
|
||||
) -> Result<(Index, Vec<Document>, String), ResponseError> {
|
||||
// TBD
|
||||
// let mut aggregate = SearchAggregator::<SearchPOST>::from_query(&query);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.static_read_txn()?;
|
||||
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
|
||||
@@ -274,7 +290,7 @@ async fn process_search_request(
|
||||
let output = output?;
|
||||
let mut documents = Vec::new();
|
||||
if let Ok((ref rtxn, ref search_result)) = output {
|
||||
// aggregate.succeed(search_result);
|
||||
MEILISEARCH_CHAT_SEARCH_REQUESTS.with_label_values(&["internal"]).inc();
|
||||
if search_result.degraded {
|
||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
||||
}
|
||||
@@ -290,7 +306,6 @@ async fn process_search_request(
|
||||
documents.push(document);
|
||||
}
|
||||
}
|
||||
// analytics.publish(aggregate, &req);
|
||||
|
||||
let (rtxn, search_result) = output?;
|
||||
let render_alloc = Bump::new();
|
||||
@@ -301,24 +316,45 @@ async fn process_search_request(
|
||||
Ok((index, documents, text))
|
||||
}
|
||||
|
||||
#[allow(unreachable_code, unused_variables)] // will be correctly implemented in the future
|
||||
async fn non_streamed_chat(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::CHAT_COMPLETIONS }>, Data<IndexScheduler>>,
|
||||
auth_ctrl: web::Data<AuthController>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
workspace_uid: &str,
|
||||
req: HttpRequest,
|
||||
mut chat_completion: CreateChatCompletionRequest,
|
||||
chat_completion: CreateChatCompletionRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_chat_completions("using the /chats chat completions route")?;
|
||||
let filters = index_scheduler.filters();
|
||||
|
||||
let rtxn = index_scheduler.read_txn()?;
|
||||
let chat_settings = match index_scheduler.chat_settings(&rtxn, workspace_uid).unwrap() {
|
||||
// Create analytics aggregator
|
||||
let aggregate = ChatCompletionAggregator::from_request(
|
||||
&chat_completion.model,
|
||||
chat_completion.messages.len(),
|
||||
false, // non_streamed_chat is not streaming
|
||||
);
|
||||
let start_time = std::time::Instant::now();
|
||||
|
||||
if let Some(n) = chat_completion.n.filter(|&n| n != 1) {
|
||||
return Err(ResponseError::from_msg(
|
||||
format!("You tried to specify n = {n} but only single choices are supported (n = 1)."),
|
||||
Code::UnimplementedMultiChoiceChatCompletions,
|
||||
));
|
||||
}
|
||||
|
||||
return Err(ResponseError::from_msg(
|
||||
"Non-streamed chat completions is not implemented".to_string(),
|
||||
Code::UnimplementedNonStreamingChatCompletions,
|
||||
));
|
||||
|
||||
let filters = index_scheduler.filters();
|
||||
let chat_settings = match index_scheduler.chat_settings(workspace_uid).unwrap() {
|
||||
Some(settings) => settings,
|
||||
None => {
|
||||
return Err(ResponseError::from_msg(
|
||||
format!("Chat `{workspace_uid}` not found"),
|
||||
Code::ChatWorkspaceNotFound,
|
||||
Code::ChatNotFound,
|
||||
))
|
||||
}
|
||||
};
|
||||
@@ -373,12 +409,11 @@ async fn non_streamed_chat(
|
||||
};
|
||||
|
||||
// TODO report documents sources later
|
||||
let text = match result {
|
||||
let answer = match result {
|
||||
Ok((_, _documents, text)) => text,
|
||||
Err(err) => err,
|
||||
};
|
||||
|
||||
let answer = format!("{}\n\n{text}", chat_settings.prompts.pre_query);
|
||||
chat_completion.messages.push(ChatCompletionRequestMessage::Tool(
|
||||
ChatCompletionRequestToolMessage {
|
||||
tool_call_id: call.id.clone(),
|
||||
@@ -397,6 +432,11 @@ async fn non_streamed_chat(
|
||||
}
|
||||
}
|
||||
|
||||
// Record success in analytics
|
||||
let mut aggregate = aggregate;
|
||||
aggregate.succeed(start_time.elapsed());
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
Ok(HttpResponse::Ok().json(response))
|
||||
}
|
||||
|
||||
@@ -407,21 +447,35 @@ async fn streamed_chat(
|
||||
workspace_uid: &str,
|
||||
req: HttpRequest,
|
||||
mut chat_completion: CreateChatCompletionRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<impl Responder, ResponseError> {
|
||||
index_scheduler.features().check_chat_completions("using the /chats chat completions route")?;
|
||||
let filters = index_scheduler.filters();
|
||||
|
||||
let rtxn = index_scheduler.read_txn()?;
|
||||
let chat_settings = match index_scheduler.chat_settings(&rtxn, workspace_uid)? {
|
||||
if let Some(n) = chat_completion.n.filter(|&n| n != 1) {
|
||||
return Err(ResponseError::from_msg(
|
||||
format!("You tried to specify n = {n} but only single choices are supported (n = 1)."),
|
||||
Code::UnimplementedMultiChoiceChatCompletions,
|
||||
));
|
||||
}
|
||||
|
||||
let chat_settings = match index_scheduler.chat_settings(workspace_uid)? {
|
||||
Some(settings) => settings,
|
||||
None => {
|
||||
return Err(ResponseError::from_msg(
|
||||
format!("Chat `{workspace_uid}` not found"),
|
||||
Code::ChatWorkspaceNotFound,
|
||||
Code::ChatNotFound,
|
||||
))
|
||||
}
|
||||
};
|
||||
drop(rtxn);
|
||||
|
||||
// Create analytics aggregator
|
||||
let mut aggregate = ChatCompletionAggregator::from_request(
|
||||
&chat_completion.model,
|
||||
chat_completion.messages.len(),
|
||||
true, // streamed_chat is always streaming
|
||||
);
|
||||
let start_time = std::time::Instant::now();
|
||||
|
||||
let config = Config::new(&chat_settings);
|
||||
let auth_token = extract_token_from_request(&req)?.unwrap().to_string();
|
||||
@@ -438,6 +492,7 @@ async fn streamed_chat(
|
||||
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(10);
|
||||
let tx = SseEventSender::new(tx);
|
||||
let workspace_uid = workspace_uid.to_string();
|
||||
let _join_handle = Handle::current().spawn(async move {
|
||||
let client = Client::with_config(config.clone());
|
||||
let mut global_tool_calls = HashMap::<u32, Call>::new();
|
||||
@@ -447,10 +502,11 @@ async fn streamed_chat(
|
||||
let output = run_conversation(
|
||||
&index_scheduler,
|
||||
&auth_ctrl,
|
||||
&workspace_uid,
|
||||
&search_queue,
|
||||
&auth_token,
|
||||
&client,
|
||||
&chat_settings,
|
||||
chat_settings.source,
|
||||
&mut chat_completion,
|
||||
&tx,
|
||||
&mut global_tool_calls,
|
||||
@@ -468,6 +524,10 @@ async fn streamed_chat(
|
||||
let _ = tx.stop().await;
|
||||
});
|
||||
|
||||
// Record success in analytics after the stream is set up
|
||||
aggregate.succeed(start_time.elapsed());
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
Ok(Sse::from_infallible_receiver(rx).with_retry_duration(Duration::from_secs(10)))
|
||||
}
|
||||
|
||||
@@ -480,22 +540,44 @@ async fn run_conversation<C: async_openai::config::Config>(
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
auth_ctrl: &web::Data<AuthController>,
|
||||
workspace_uid: &str,
|
||||
search_queue: &web::Data<SearchQueue>,
|
||||
auth_token: &str,
|
||||
client: &Client<C>,
|
||||
chat_settings: &DbChatSettings,
|
||||
source: DbChatCompletionSource,
|
||||
chat_completion: &mut CreateChatCompletionRequest,
|
||||
tx: &SseEventSender,
|
||||
global_tool_calls: &mut HashMap<u32, Call>,
|
||||
function_support: FunctionSupport,
|
||||
) -> Result<ControlFlow<Option<FinishReason>, ()>, SendError<Event>> {
|
||||
use DbChatCompletionSource::*;
|
||||
|
||||
let mut finish_reason = None;
|
||||
// safety: The unwrap can only happen if the stream is not correctly configured.
|
||||
chat_completion.stream_options = match source {
|
||||
OpenAi | AzureOpenAi => Some(ChatCompletionStreamOptions { include_usage: true }),
|
||||
Mistral | VLlm => None,
|
||||
};
|
||||
|
||||
// safety: unwrap: can only happens if `stream` was set to `false`
|
||||
let mut response = client.chat().create_stream(chat_completion.clone()).await.unwrap();
|
||||
while let Some(result) = response.next().await {
|
||||
match result {
|
||||
Ok(resp) => {
|
||||
let choice = &resp.choices[0];
|
||||
if let Some(usage) = resp.usage.as_ref() {
|
||||
MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE
|
||||
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||
.inc_by(usage.prompt_tokens as u64);
|
||||
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE
|
||||
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||
.inc_by(usage.completion_tokens as u64);
|
||||
MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE
|
||||
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||
.inc_by(usage.total_tokens as u64);
|
||||
}
|
||||
let choice = match resp.choices.first() {
|
||||
Some(choice) => choice,
|
||||
None => break,
|
||||
};
|
||||
finish_reason = choice.finish_reason;
|
||||
|
||||
let ChatCompletionStreamResponseDelta { ref tool_calls, .. } = &choice.delta;
|
||||
@@ -530,15 +612,11 @@ async fn run_conversation<C: async_openai::config::Config>(
|
||||
Call::External
|
||||
}
|
||||
});
|
||||
|
||||
if global_tool_calls.get(index).is_some_and(Call::is_external) {
|
||||
todo!("Support forwarding external tool calls");
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
if !global_tool_calls.is_empty() {
|
||||
let (meili_calls, other_calls): (Vec<_>, Vec<_>) =
|
||||
let (meili_calls, _other_calls): (Vec<_>, Vec<_>) =
|
||||
mem::take(global_tool_calls)
|
||||
.into_values()
|
||||
.flat_map(|call| match call {
|
||||
@@ -563,17 +641,11 @@ async fn run_conversation<C: async_openai::config::Config>(
|
||||
.into(),
|
||||
);
|
||||
|
||||
assert!(
|
||||
other_calls.is_empty(),
|
||||
"We do not support external tool forwarding for now"
|
||||
);
|
||||
|
||||
handle_meili_tools(
|
||||
index_scheduler,
|
||||
auth_ctrl,
|
||||
search_queue,
|
||||
auth_token,
|
||||
chat_settings,
|
||||
tx,
|
||||
meili_calls,
|
||||
chat_completion,
|
||||
@@ -588,7 +660,13 @@ async fn run_conversation<C: async_openai::config::Config>(
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
let error = StreamErrorEvent::from_openai_error(error).await.unwrap();
|
||||
let result = match source {
|
||||
DbChatCompletionSource::Mistral => {
|
||||
StreamErrorEvent::from_openai_error::<MistralError>(error).await
|
||||
}
|
||||
_ => StreamErrorEvent::from_openai_error::<OpenAiOutsideError>(error).await,
|
||||
};
|
||||
let error = result.unwrap_or_else(StreamErrorEvent::from_reqwest_error);
|
||||
tx.send_error(&error).await?;
|
||||
return Ok(ControlFlow::Break(None));
|
||||
}
|
||||
@@ -611,7 +689,6 @@ async fn handle_meili_tools(
|
||||
auth_ctrl: &web::Data<AuthController>,
|
||||
search_queue: &web::Data<SearchQueue>,
|
||||
auth_token: &str,
|
||||
chat_settings: &DbChatSettings,
|
||||
tx: &SseEventSender,
|
||||
meili_calls: Vec<ChatCompletionMessageToolCall>,
|
||||
chat_completion: &mut CreateChatCompletionRequest,
|
||||
@@ -639,8 +716,10 @@ async fn handle_meili_tools(
|
||||
.await?;
|
||||
}
|
||||
|
||||
let mut error = None;
|
||||
|
||||
let result = match serde_json::from_str(&call.function.arguments) {
|
||||
Ok(SearchInIndexParameters { index_uid, q }) => process_search_request(
|
||||
Ok(SearchInIndexParameters { index_uid, q }) => match process_search_request(
|
||||
index_scheduler,
|
||||
auth_ctrl.clone(),
|
||||
search_queue,
|
||||
@@ -649,22 +728,27 @@ async fn handle_meili_tools(
|
||||
q,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| e.to_string()),
|
||||
{
|
||||
Ok(output) => Ok(output),
|
||||
Err(err) => {
|
||||
let error_text = format!("the search tool call failed with {err}");
|
||||
error = Some(err);
|
||||
Err(error_text)
|
||||
}
|
||||
},
|
||||
Err(err) => Err(err.to_string()),
|
||||
};
|
||||
|
||||
let text = match result {
|
||||
let answer = match result {
|
||||
Ok((_index, documents, text)) => {
|
||||
if report_sources {
|
||||
tx.report_sources(resp.clone(), &call.id, &documents).await?;
|
||||
}
|
||||
|
||||
text
|
||||
}
|
||||
Err(err) => err,
|
||||
};
|
||||
|
||||
let answer = format!("{}\n\n{text}", chat_settings.prompts.pre_query);
|
||||
let tool = ChatCompletionRequestMessage::Tool(ChatCompletionRequestToolMessage {
|
||||
tool_call_id: call.id.clone(),
|
||||
content: ChatCompletionRequestToolMessageContent::Text(answer),
|
||||
@@ -675,6 +759,10 @@ async fn handle_meili_tools(
|
||||
}
|
||||
|
||||
chat_completion.messages.push(tool);
|
||||
|
||||
if let Some(error) = error {
|
||||
tx.send_error(&StreamErrorEvent::from_response_error(error)).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -696,16 +784,13 @@ impl Call {
|
||||
matches!(self, Call::Internal { .. })
|
||||
}
|
||||
|
||||
fn is_external(&self) -> bool {
|
||||
matches!(self, Call::External { .. })
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
///
|
||||
/// - if called on external calls
|
||||
fn append(&mut self, more: &str) {
|
||||
match self {
|
||||
Call::Internal { arguments, .. } => arguments.push_str(more),
|
||||
Call::External { .. } => {
|
||||
panic!("Cannot append argument chunks to an external function")
|
||||
}
|
||||
Call::External => panic!("Cannot append argument chunks to an external function"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ impl Config {
|
||||
pub fn new(chat_settings: &DbChatSettings) -> Self {
|
||||
use meilisearch_types::features::ChatCompletionSource::*;
|
||||
match chat_settings.source {
|
||||
OpenAi | Mistral | Gemini | VLlm => {
|
||||
OpenAi | Mistral | VLlm => {
|
||||
let mut config = OpenAIConfig::default();
|
||||
if let Some(org_id) = chat_settings.org_id.as_ref() {
|
||||
config = config.with_org_id(org_id);
|
||||
@@ -24,8 +24,9 @@ impl Config {
|
||||
if let Some(api_key) = chat_settings.api_key.as_ref() {
|
||||
config = config.with_api_key(api_key);
|
||||
}
|
||||
if let Some(base_api) = chat_settings.base_api.as_ref() {
|
||||
config = config.with_api_base(base_api);
|
||||
let base_url = chat_settings.base_url.as_deref();
|
||||
if let Some(base_url) = chat_settings.source.base_url().or(base_url) {
|
||||
config = config.with_api_base(base_url);
|
||||
}
|
||||
Self::OpenAiCompatible(config)
|
||||
}
|
||||
@@ -40,8 +41,8 @@ impl Config {
|
||||
if let Some(api_key) = chat_settings.api_key.as_ref() {
|
||||
config = config.with_api_key(api_key);
|
||||
}
|
||||
if let Some(base_api) = chat_settings.base_api.as_ref() {
|
||||
config = config.with_api_base(base_api);
|
||||
if let Some(base_url) = chat_settings.base_url.as_ref() {
|
||||
config = config.with_api_base(base_url);
|
||||
}
|
||||
Self::AzureOpenAiCompatible(config)
|
||||
}
|
||||
|
||||
@@ -1,8 +1,42 @@
|
||||
use async_openai::error::{ApiError, OpenAIError};
|
||||
use async_openai::reqwest_eventsource::Error as EventSourceError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// The error type which is always `error`.
|
||||
const ERROR_TYPE: &str = "error";
|
||||
|
||||
/// The error struct returned by the Mistral API.
|
||||
///
|
||||
/// ```json
|
||||
/// {
|
||||
/// "object": "error",
|
||||
/// "message": "Service tier capacity exceeded for this model.",
|
||||
/// "type": "invalid_request_error",
|
||||
/// "param": null,
|
||||
/// "code": null
|
||||
/// }
|
||||
/// ```
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct MistralError {
|
||||
message: String,
|
||||
r#type: String,
|
||||
param: Option<String>,
|
||||
code: Option<String>,
|
||||
}
|
||||
|
||||
impl From<MistralError> for StreamErrorEvent {
|
||||
fn from(error: MistralError) -> Self {
|
||||
let MistralError { message, r#type, param, code } = error;
|
||||
StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: ERROR_TYPE.to_owned(),
|
||||
error: StreamError { r#type, code, message, param, event_id: None },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct OpenAiOutsideError {
|
||||
/// Emitted when an error occurs.
|
||||
@@ -22,6 +56,17 @@ pub struct OpenAiInnerError {
|
||||
r#type: String,
|
||||
}
|
||||
|
||||
impl From<OpenAiOutsideError> for StreamErrorEvent {
|
||||
fn from(error: OpenAiOutsideError) -> Self {
|
||||
let OpenAiOutsideError { error: OpenAiInnerError { code, message, param, r#type } } = error;
|
||||
StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
error: StreamError { r#type, code, message, param, event_id: None },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An error that occurs during the streaming process.
|
||||
///
|
||||
/// It directly comes from the OpenAI API and you can
|
||||
@@ -53,12 +98,15 @@ pub struct StreamError {
|
||||
}
|
||||
|
||||
impl StreamErrorEvent {
|
||||
pub async fn from_openai_error(error: OpenAIError) -> Result<Self, reqwest::Error> {
|
||||
let error_type = "error".to_string();
|
||||
pub async fn from_openai_error<E>(error: OpenAIError) -> Result<Self, reqwest::Error>
|
||||
where
|
||||
E: serde::de::DeserializeOwned,
|
||||
Self: From<E>,
|
||||
{
|
||||
match error {
|
||||
OpenAIError::Reqwest(e) => Ok(StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: error_type,
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
error: StreamError {
|
||||
r#type: "internal_reqwest_error".to_string(),
|
||||
code: Some("internal".to_string()),
|
||||
@@ -69,7 +117,7 @@ impl StreamErrorEvent {
|
||||
}),
|
||||
OpenAIError::ApiError(ApiError { message, r#type, param, code }) => {
|
||||
Ok(StreamErrorEvent {
|
||||
r#type: error_type,
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
error: StreamError {
|
||||
r#type: r#type.unwrap_or_else(|| "unknown".to_string()),
|
||||
@@ -82,7 +130,7 @@ impl StreamErrorEvent {
|
||||
}
|
||||
OpenAIError::JSONDeserialize(error) => Ok(StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: error_type,
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
error: StreamError {
|
||||
r#type: "json_deserialize_error".to_string(),
|
||||
code: Some("internal".to_string()),
|
||||
@@ -94,30 +142,16 @@ impl StreamErrorEvent {
|
||||
OpenAIError::FileSaveError(_) | OpenAIError::FileReadError(_) => unreachable!(),
|
||||
OpenAIError::StreamError(error) => match error {
|
||||
EventSourceError::InvalidStatusCode(_status_code, response) => {
|
||||
let OpenAiOutsideError {
|
||||
error: OpenAiInnerError { code, message, param, r#type },
|
||||
} = response.json().await?;
|
||||
|
||||
Ok(StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: error_type,
|
||||
error: StreamError { r#type, code, message, param, event_id: None },
|
||||
})
|
||||
let error = response.json::<E>().await?;
|
||||
Ok(StreamErrorEvent::from(error))
|
||||
}
|
||||
EventSourceError::InvalidContentType(_header_value, response) => {
|
||||
let OpenAiOutsideError {
|
||||
error: OpenAiInnerError { code, message, param, r#type },
|
||||
} = response.json().await?;
|
||||
|
||||
Ok(StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: error_type,
|
||||
error: StreamError { r#type, code, message, param, event_id: None },
|
||||
})
|
||||
let error = response.json::<E>().await?;
|
||||
Ok(StreamErrorEvent::from(error))
|
||||
}
|
||||
EventSourceError::Utf8(error) => Ok(StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: error_type,
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
error: StreamError {
|
||||
r#type: "invalid_utf8_error".to_string(),
|
||||
code: None,
|
||||
@@ -128,7 +162,7 @@ impl StreamErrorEvent {
|
||||
}),
|
||||
EventSourceError::Parser(error) => Ok(StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: error_type,
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
error: StreamError {
|
||||
r#type: "parser_error".to_string(),
|
||||
code: None,
|
||||
@@ -139,7 +173,7 @@ impl StreamErrorEvent {
|
||||
}),
|
||||
EventSourceError::Transport(error) => Ok(StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: error_type,
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
error: StreamError {
|
||||
r#type: "transport_error".to_string(),
|
||||
code: None,
|
||||
@@ -150,7 +184,7 @@ impl StreamErrorEvent {
|
||||
}),
|
||||
EventSourceError::InvalidLastEventId(message) => Ok(StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: error_type,
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
error: StreamError {
|
||||
r#type: "invalid_last_event_id".to_string(),
|
||||
code: None,
|
||||
@@ -161,7 +195,7 @@ impl StreamErrorEvent {
|
||||
}),
|
||||
EventSourceError::StreamEnded => Ok(StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: error_type,
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
error: StreamError {
|
||||
r#type: "stream_ended".to_string(),
|
||||
code: None,
|
||||
@@ -173,7 +207,7 @@ impl StreamErrorEvent {
|
||||
},
|
||||
OpenAIError::InvalidArgument(message) => Ok(StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: error_type,
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
error: StreamError {
|
||||
r#type: "invalid_argument".to_string(),
|
||||
code: None,
|
||||
@@ -184,4 +218,33 @@ impl StreamErrorEvent {
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_response_error(error: ResponseError) -> Self {
|
||||
let ResponseError { code, message, .. } = error;
|
||||
StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
error: StreamError {
|
||||
r#type: "response_error".to_string(),
|
||||
code: Some(code.as_str().to_string()),
|
||||
message,
|
||||
param: None,
|
||||
event_id: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_reqwest_error(error: reqwest::Error) -> Self {
|
||||
StreamErrorEvent {
|
||||
event_id: Uuid::new_v4().to_string(),
|
||||
r#type: ERROR_TYPE.to_string(),
|
||||
error: StreamError {
|
||||
r#type: "reqwest_error".to_string(),
|
||||
code: None,
|
||||
message: error.to_string(),
|
||||
param: None,
|
||||
event_id: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,9 +6,11 @@ use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::DeserrQueryParamError;
|
||||
use meilisearch_types::error::deserr_codes::{InvalidIndexLimit, InvalidIndexOffset};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::keys::actions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
use utoipa::{IntoParams, ToSchema};
|
||||
|
||||
@@ -17,6 +19,7 @@ use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::PAGINATION_DEFAULT_LIMIT;
|
||||
|
||||
mod chat_completion_analytics;
|
||||
pub mod chat_completions;
|
||||
mod config;
|
||||
mod errors;
|
||||
@@ -24,12 +27,21 @@ pub mod settings;
|
||||
mod utils;
|
||||
|
||||
/// The function name to report search progress.
|
||||
/// This function is used to report on what meilisearch is
|
||||
/// doing which must be used on the frontend to report progress.
|
||||
const MEILI_SEARCH_PROGRESS_NAME: &str = "_meiliSearchProgress";
|
||||
/// The function name to append a conversation message in the user conversation.
|
||||
/// This function is used to append a conversation message in the user conversation.
|
||||
/// This must be used on the frontend to keep context of what happened on the
|
||||
/// Meilisearch-side and keep good context for follow up questions.
|
||||
const MEILI_APPEND_CONVERSATION_MESSAGE_NAME: &str = "_meiliAppendConversationMessage";
|
||||
/// The function name to report sources to the frontend.
|
||||
/// This function is used to report sources to the frontend.
|
||||
/// The call id is associated to the one used by the search progress function.
|
||||
const MEILI_SEARCH_SOURCES_NAME: &str = "_meiliSearchSources";
|
||||
/// The *internal* function name to provide to the LLM to search in indexes.
|
||||
/// This function must not leak to the user as the LLM will call it and the
|
||||
/// main goal of Meilisearch is to provide an answer to these calls.
|
||||
const MEILI_SEARCH_IN_INDEX_FUNCTION_NAME: &str = "_meiliSearchInIndex";
|
||||
|
||||
#[derive(Deserialize)]
|
||||
@@ -40,11 +52,44 @@ pub struct ChatsParam {
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::get().to(list_workspaces))).service(
|
||||
web::scope("/{workspace_uid}")
|
||||
.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(get_chat))
|
||||
.route(web::delete().to(delete_chat)),
|
||||
)
|
||||
.service(web::scope("/chat/completions").configure(chat_completions::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn get_chat(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::CHATS_GET }>, Data<IndexScheduler>>,
|
||||
workspace_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_chat_completions("displaying a chat")?;
|
||||
|
||||
let workspace_uid = IndexUid::try_from(workspace_uid.into_inner())?;
|
||||
if index_scheduler.chat_workspace_exists(&workspace_uid)? {
|
||||
Ok(HttpResponse::Ok().json(json!({ "uid": workspace_uid })))
|
||||
} else {
|
||||
Err(ResponseError::from_msg(format!("chat {workspace_uid} not found"), Code::ChatNotFound))
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_chat(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::CHATS_DELETE }>, Data<IndexScheduler>>,
|
||||
workspace_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_chat_completions("deleting a chat")?;
|
||||
|
||||
let workspace_uid = workspace_uid.into_inner();
|
||||
if index_scheduler.delete_chat_settings(&workspace_uid)? {
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
} else {
|
||||
Err(ResponseError::from_msg(format!("chat {workspace_uid} not found"), Code::ChatNotFound))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserr, Debug, Clone, Copy, IntoParams)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[into_params(rename_all = "camelCase", parameter_in = Query)]
|
||||
@@ -79,12 +124,8 @@ pub async fn list_workspaces(
|
||||
index_scheduler.features().check_chat_completions("listing the chats")?;
|
||||
|
||||
debug!(parameters = ?paginate, "List chat workspaces");
|
||||
let filters = index_scheduler.filters();
|
||||
let (total, workspaces) = index_scheduler.paginated_chat_workspace_uids(
|
||||
filters,
|
||||
*paginate.offset,
|
||||
*paginate.limit,
|
||||
)?;
|
||||
let (total, workspaces) =
|
||||
index_scheduler.paginated_chat_workspace_uids(*paginate.offset, *paginate.limit)?;
|
||||
let workspaces =
|
||||
workspaces.into_iter().map(|uid| ChatWorkspaceView { uid }).collect::<Vec<_>>();
|
||||
let ret = paginate.as_pagination().format_with(total, workspaces);
|
||||
|
||||
@@ -7,9 +7,9 @@ use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::features::{
|
||||
ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSettings,
|
||||
ChatCompletionSource as DbChatCompletionSource, DEFAULT_CHAT_PRE_QUERY_PROMPT,
|
||||
DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT, DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT,
|
||||
DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT, DEFAULT_CHAT_SYSTEM_PROMPT,
|
||||
ChatCompletionSource as DbChatCompletionSource, DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT,
|
||||
DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT,
|
||||
DEFAULT_CHAT_SYSTEM_PROMPT,
|
||||
};
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
@@ -26,7 +26,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(get_settings)))
|
||||
.route(web::patch().to(SeqHandler(patch_settings)))
|
||||
.route(web::delete().to(SeqHandler(delete_settings))),
|
||||
.route(web::delete().to(SeqHandler(reset_settings))),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -41,14 +41,12 @@ async fn get_settings(
|
||||
|
||||
let ChatsParam { workspace_uid } = chats_param.into_inner();
|
||||
|
||||
// TODO do a spawn_blocking here ???
|
||||
let rtxn = index_scheduler.read_txn()?;
|
||||
let mut settings = match index_scheduler.chat_settings(&rtxn, &workspace_uid)? {
|
||||
let mut settings = match index_scheduler.chat_settings(&workspace_uid)? {
|
||||
Some(settings) => settings,
|
||||
None => {
|
||||
return Err(ResponseError::from_msg(
|
||||
format!("Chat `{workspace_uid}` not found"),
|
||||
Code::ChatWorkspaceNotFound,
|
||||
Code::ChatNotFound,
|
||||
))
|
||||
}
|
||||
};
|
||||
@@ -62,14 +60,12 @@ async fn patch_settings(
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
chats_param: web::Path<ChatsParam>,
|
||||
web::Json(new): web::Json<GlobalChatSettings>,
|
||||
web::Json(new): web::Json<ChatWorkspaceSettings>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_chat_completions("using the /chats/settings route")?;
|
||||
let ChatsParam { workspace_uid } = chats_param.into_inner();
|
||||
|
||||
// TODO do a spawn_blocking here
|
||||
let mut wtxn = index_scheduler.write_txn()?;
|
||||
let old_settings = index_scheduler.chat_settings(&wtxn, &workspace_uid)?.unwrap_or_default();
|
||||
let old_settings = index_scheduler.chat_settings(&workspace_uid)?.unwrap_or_default();
|
||||
|
||||
let prompts = match new.prompts {
|
||||
Setting::Set(new_prompts) => DbChatCompletionPrompts {
|
||||
@@ -93,17 +89,12 @@ async fn patch_settings(
|
||||
Setting::Reset => DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
|
||||
Setting::NotSet => old_settings.prompts.search_index_uid_param,
|
||||
},
|
||||
pre_query: match new_prompts.pre_query {
|
||||
Setting::Set(new_description) => new_description,
|
||||
Setting::Reset => DEFAULT_CHAT_PRE_QUERY_PROMPT.to_string(),
|
||||
Setting::NotSet => old_settings.prompts.pre_query,
|
||||
},
|
||||
},
|
||||
Setting::Reset => DbChatCompletionPrompts::default(),
|
||||
Setting::NotSet => old_settings.prompts,
|
||||
};
|
||||
|
||||
let settings = ChatCompletionSettings {
|
||||
let mut settings = ChatCompletionSettings {
|
||||
source: match new.source {
|
||||
Setting::Set(new_source) => new_source.into(),
|
||||
Setting::Reset => DbChatCompletionSource::default(),
|
||||
@@ -129,10 +120,10 @@ async fn patch_settings(
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => old_settings.deployment_id,
|
||||
},
|
||||
base_api: match new.base_api {
|
||||
Setting::Set(new_base_api) => Some(new_base_api),
|
||||
base_url: match new.base_url {
|
||||
Setting::Set(new_base_url) => Some(new_base_url),
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => old_settings.base_api,
|
||||
Setting::NotSet => old_settings.base_url,
|
||||
},
|
||||
api_key: match new.api_key {
|
||||
Setting::Set(new_api_key) => Some(new_api_key),
|
||||
@@ -151,15 +142,17 @@ async fn patch_settings(
|
||||
// &req,
|
||||
// );
|
||||
|
||||
index_scheduler.put_chat_settings(&mut wtxn, &workspace_uid, &settings)?;
|
||||
wtxn.commit()?;
|
||||
settings.validate()?;
|
||||
index_scheduler.put_chat_settings(&workspace_uid, &settings)?;
|
||||
|
||||
settings.hide_secrets();
|
||||
|
||||
Ok(HttpResponse::Ok().json(settings))
|
||||
}
|
||||
|
||||
async fn delete_settings(
|
||||
async fn reset_settings(
|
||||
index_scheduler: GuardedData<
|
||||
ActionPolicy<{ actions::CHATS_SETTINGS_DELETE }>,
|
||||
ActionPolicy<{ actions::CHATS_SETTINGS_UPDATE }>,
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
chats_param: web::Path<ChatsParam>,
|
||||
@@ -167,16 +160,14 @@ async fn delete_settings(
|
||||
index_scheduler.features().check_chat_completions("using the /chats/settings route")?;
|
||||
|
||||
let ChatsParam { workspace_uid } = chats_param.into_inner();
|
||||
|
||||
// TODO do a spawn_blocking here
|
||||
let mut wtxn = index_scheduler.write_txn()?;
|
||||
if index_scheduler.delete_chat_settings(&mut wtxn, &workspace_uid)? {
|
||||
wtxn.commit()?;
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
if index_scheduler.chat_settings(&workspace_uid)?.is_some() {
|
||||
let settings = Default::default();
|
||||
index_scheduler.put_chat_settings(&workspace_uid, &settings)?;
|
||||
Ok(HttpResponse::Ok().json(settings))
|
||||
} else {
|
||||
Err(ResponseError::from_msg(
|
||||
format!("Chat `{workspace_uid}` not found"),
|
||||
Code::ChatWorkspaceNotFound,
|
||||
Code::ChatNotFound,
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -185,7 +176,7 @@ async fn delete_settings(
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct GlobalChatSettings {
|
||||
pub struct ChatWorkspaceSettings {
|
||||
#[serde(default)]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<ChatCompletionSource>)]
|
||||
@@ -209,7 +200,7 @@ pub struct GlobalChatSettings {
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionBaseApi>)]
|
||||
#[schema(value_type = Option<String>, example = json!("https://api.mistral.ai/v1"))]
|
||||
pub base_api: Setting<String>,
|
||||
pub base_url: Setting<String>,
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionApiKey>)]
|
||||
#[schema(value_type = Option<String>, example = json!("abcd1234..."))]
|
||||
@@ -226,12 +217,19 @@ pub struct GlobalChatSettings {
|
||||
pub enum ChatCompletionSource {
|
||||
#[default]
|
||||
OpenAi,
|
||||
Mistral,
|
||||
AzureOpenAi,
|
||||
VLlm,
|
||||
}
|
||||
|
||||
impl From<ChatCompletionSource> for DbChatCompletionSource {
|
||||
fn from(source: ChatCompletionSource) -> Self {
|
||||
use ChatCompletionSource::*;
|
||||
match source {
|
||||
ChatCompletionSource::OpenAi => DbChatCompletionSource::OpenAi,
|
||||
OpenAi => DbChatCompletionSource::OpenAi,
|
||||
Mistral => DbChatCompletionSource::Mistral,
|
||||
AzureOpenAi => DbChatCompletionSource::AzureOpenAi,
|
||||
VLlm => DbChatCompletionSource::VLlm,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -257,8 +255,4 @@ pub struct ChatPrompts {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchIndexUidParamPrompt>)]
|
||||
#[schema(value_type = Option<String>, example = json!("This is index you want to search in..."))]
|
||||
pub search_index_uid_param: Setting<String>,
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionPreQueryPrompt>)]
|
||||
#[schema(value_type = Option<String>)]
|
||||
pub pre_query: Setting<String>,
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ use async_openai::types::{
|
||||
FunctionCall, FunctionCallStream, Role,
|
||||
};
|
||||
use bumpalo::Bump;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::index::ChatConfig;
|
||||
use meilisearch_types::milli::prompt::{Prompt, PromptData};
|
||||
@@ -41,7 +41,7 @@ impl SseEventSender {
|
||||
function_name: String,
|
||||
function_arguments: String,
|
||||
) -> Result<(), SendError<Event>> {
|
||||
#[allow(deprecated)]
|
||||
#[allow(deprecated)] // function_call
|
||||
let message =
|
||||
ChatCompletionRequestMessage::Assistant(ChatCompletionRequestAssistantMessage {
|
||||
content: None,
|
||||
@@ -78,7 +78,7 @@ impl SseEventSender {
|
||||
|
||||
resp.choices[0] = ChatChoiceStream {
|
||||
index: 0,
|
||||
#[allow(deprecated)]
|
||||
#[allow(deprecated)] // function_call
|
||||
delta: ChatCompletionStreamResponseDelta {
|
||||
content: None,
|
||||
function_call: None,
|
||||
@@ -125,7 +125,7 @@ impl SseEventSender {
|
||||
|
||||
resp.choices[0] = ChatChoiceStream {
|
||||
index: 0,
|
||||
#[allow(deprecated)]
|
||||
#[allow(deprecated)] // function_call
|
||||
delta: ChatCompletionStreamResponseDelta {
|
||||
content: None,
|
||||
function_call: None,
|
||||
@@ -170,7 +170,7 @@ impl SseEventSender {
|
||||
|
||||
resp.choices[0] = ChatChoiceStream {
|
||||
index: 0,
|
||||
#[allow(deprecated)]
|
||||
#[allow(deprecated)] // function_call
|
||||
delta: ChatCompletionStreamResponseDelta {
|
||||
content: None,
|
||||
function_call: None,
|
||||
@@ -197,7 +197,10 @@ impl SseEventSender {
|
||||
}
|
||||
|
||||
pub async fn stop(self) -> Result<(), SendError<Event>> {
|
||||
self.0.send(Event::Data(sse::Data::new("[DONE]"))).await
|
||||
// It is the way OpenAI sends a correct end of stream
|
||||
// <https://platform.openai.com/docs/api-reference/assistants-streaming/events>
|
||||
const DONE_DATA: &str = "[DONE]";
|
||||
self.0.send(Event::Data(sse::Data::new(DONE_DATA))).await
|
||||
}
|
||||
|
||||
async fn send_json<S: Serialize>(&self, data: &S) -> Result<(), SendError<Event>> {
|
||||
@@ -232,10 +235,17 @@ pub fn format_documents<'doc>(
|
||||
for (docid, external_docid) in internal_docids.into_iter().zip(external_ids) {
|
||||
let document = match DocumentFromDb::new(docid, rtxn, index, &fid_map)? {
|
||||
Some(doc) => doc,
|
||||
None => continue,
|
||||
None => unreachable!("Document with internal ID {docid} not found"),
|
||||
};
|
||||
let text = match prompt.render_document(&external_docid, document, &gfid_map, doc_alloc) {
|
||||
Ok(text) => text,
|
||||
Err(err) => {
|
||||
return Err(ResponseError::from_msg(
|
||||
err.to_string(),
|
||||
Code::InvalidChatSettingDocumentTemplate,
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let text = prompt.render_document(&external_docid, document, &gfid_map, doc_alloc).unwrap();
|
||||
renders.push(text);
|
||||
}
|
||||
|
||||
|
||||
183
crates/meilisearch/src/routes/export.rs
Normal file
183
crates/meilisearch/src/routes/export.rs
Normal file
@@ -0,0 +1,183 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::convert::Infallible;
|
||||
use std::str::FromStr as _;
|
||||
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use byte_unit::Byte;
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::Deserr;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::tasks::{ExportIndexSettings as DbExportIndexSettings, KindWithContent};
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::export_analytics::ExportAnalytics;
|
||||
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::Opt;
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
paths(export),
|
||||
tags((
|
||||
name = "Export",
|
||||
description = "The `/export` route allows you to trigger an export process to a remote Meilisearch instance.",
|
||||
external_docs(url = "https://www.meilisearch.com/docs/reference/api/export"),
|
||||
)),
|
||||
)]
|
||||
pub struct ExportApi;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(export)));
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "",
|
||||
tag = "Export",
|
||||
security(("Bearer" = ["export", "*"])),
|
||||
responses(
|
||||
(status = 202, description = "Export successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
|
||||
{
|
||||
"taskUid": 1,
|
||||
"status": "enqueued",
|
||||
"type": "export",
|
||||
"enqueuedAt": "2021-08-11T09:25:53.000000Z"
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||
"code": "missing_authorization_header",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||
}
|
||||
)),
|
||||
)
|
||||
)]
|
||||
async fn export(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::EXPORT }>, Data<IndexScheduler>>,
|
||||
export: AwebJson<Export, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let export = export.into_inner();
|
||||
debug!(returns = ?export, "Trigger export");
|
||||
|
||||
let analytics_aggregate = ExportAnalytics::from_export(&export);
|
||||
|
||||
let Export { url, api_key, payload_size, indexes } = export;
|
||||
|
||||
let indexes = match indexes {
|
||||
Some(indexes) => indexes
|
||||
.into_iter()
|
||||
.map(|(pattern, ExportIndexSettings { filter, override_settings })| {
|
||||
(pattern, DbExportIndexSettings { filter, override_settings })
|
||||
})
|
||||
.collect(),
|
||||
None => BTreeMap::from([(
|
||||
IndexUidPattern::new_unchecked("*"),
|
||||
DbExportIndexSettings::default(),
|
||||
)]),
|
||||
};
|
||||
|
||||
let task = KindWithContent::Export {
|
||||
url,
|
||||
api_key,
|
||||
payload_size: payload_size.map(|ByteWithDeserr(bytes)| bytes),
|
||||
indexes,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
analytics.publish(analytics_aggregate, &req);
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct Export {
|
||||
#[schema(value_type = Option<String>, example = json!("https://ms-1234.heaven.meilisearch.com"))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidExportUrl>)]
|
||||
pub url: String,
|
||||
#[schema(value_type = Option<String>, example = json!("1234abcd"))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidExportApiKey>)]
|
||||
pub api_key: Option<String>,
|
||||
#[schema(value_type = Option<String>, example = json!("24MiB"))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidExportPayloadSize>)]
|
||||
pub payload_size: Option<ByteWithDeserr>,
|
||||
#[schema(value_type = Option<BTreeMap<String, ExportIndexSettings>>, example = json!({ "*": { "filter": null } }))]
|
||||
#[deserr(default)]
|
||||
#[serde(default)]
|
||||
pub indexes: Option<BTreeMap<IndexUidPattern, ExportIndexSettings>>,
|
||||
}
|
||||
|
||||
/// A wrapper around the `Byte` type that implements `Deserr`.
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct ByteWithDeserr(pub Byte);
|
||||
|
||||
impl<E> deserr::Deserr<E> for ByteWithDeserr
|
||||
where
|
||||
E: deserr::DeserializeError,
|
||||
{
|
||||
fn deserialize_from_value<V: deserr::IntoValue>(
|
||||
value: deserr::Value<V>,
|
||||
location: deserr::ValuePointerRef,
|
||||
) -> Result<Self, E> {
|
||||
use deserr::{ErrorKind, Value, ValueKind};
|
||||
match value {
|
||||
Value::Integer(integer) => Ok(ByteWithDeserr(Byte::from_u64(integer))),
|
||||
Value::String(string) => Byte::from_str(&string).map(ByteWithDeserr).map_err(|e| {
|
||||
deserr::take_cf_content(E::error::<Infallible>(
|
||||
None,
|
||||
ErrorKind::Unexpected { msg: e.to_string() },
|
||||
location,
|
||||
))
|
||||
}),
|
||||
actual => Err(deserr::take_cf_content(E::error(
|
||||
None,
|
||||
ErrorKind::IncorrectValueKind {
|
||||
actual,
|
||||
accepted: &[ValueKind::Integer, ValueKind::String],
|
||||
},
|
||||
location,
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct ExportIndexSettings {
|
||||
#[schema(value_type = Option<String>, example = json!("genres = action"))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidExportIndexFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[schema(value_type = Option<bool>, example = json!(true))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidExportIndexOverrideSettings>)]
|
||||
pub override_settings: bool,
|
||||
}
|
||||
93
crates/meilisearch/src/routes/export_analytics.rs
Normal file
93
crates/meilisearch/src/routes/export_analytics.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
use crate::analytics::Aggregate;
|
||||
use crate::routes::export::Export;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ExportAnalytics {
|
||||
total_received: usize,
|
||||
has_api_key: bool,
|
||||
sum_index_patterns: usize,
|
||||
sum_patterns_with_filter: usize,
|
||||
sum_patterns_with_override_settings: usize,
|
||||
payload_sizes: Vec<u64>,
|
||||
}
|
||||
|
||||
impl ExportAnalytics {
|
||||
pub fn from_export(export: &Export) -> Self {
|
||||
let Export { url: _, api_key, payload_size, indexes } = export;
|
||||
|
||||
let has_api_key = api_key.is_some();
|
||||
let index_patterns_count = indexes.as_ref().map_or(0, |indexes| indexes.len());
|
||||
let patterns_with_filter_count = indexes.as_ref().map_or(0, |indexes| {
|
||||
indexes.values().filter(|settings| settings.filter.is_some()).count()
|
||||
});
|
||||
let patterns_with_override_settings_count = indexes.as_ref().map_or(0, |indexes| {
|
||||
indexes.values().filter(|settings| settings.override_settings).count()
|
||||
});
|
||||
let payload_sizes =
|
||||
if let Some(crate::routes::export::ByteWithDeserr(byte_size)) = payload_size {
|
||||
vec![byte_size.as_u64()]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
Self {
|
||||
total_received: 1,
|
||||
has_api_key,
|
||||
sum_index_patterns: index_patterns_count,
|
||||
sum_patterns_with_filter: patterns_with_filter_count,
|
||||
sum_patterns_with_override_settings: patterns_with_override_settings_count,
|
||||
payload_sizes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Aggregate for ExportAnalytics {
|
||||
fn event_name(&self) -> &'static str {
|
||||
"Export Triggered"
|
||||
}
|
||||
|
||||
fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
|
||||
self.total_received += other.total_received;
|
||||
self.has_api_key |= other.has_api_key;
|
||||
self.sum_index_patterns += other.sum_index_patterns;
|
||||
self.sum_patterns_with_filter += other.sum_patterns_with_filter;
|
||||
self.sum_patterns_with_override_settings += other.sum_patterns_with_override_settings;
|
||||
self.payload_sizes.extend(other.payload_sizes);
|
||||
self
|
||||
}
|
||||
|
||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
||||
let avg_payload_size = if self.payload_sizes.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(self.payload_sizes.iter().sum::<u64>() / self.payload_sizes.len() as u64)
|
||||
};
|
||||
|
||||
let avg_index_patterns = if self.total_received == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(self.sum_index_patterns as f64 / self.total_received as f64)
|
||||
};
|
||||
|
||||
let avg_patterns_with_filter = if self.total_received == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(self.sum_patterns_with_filter as f64 / self.total_received as f64)
|
||||
};
|
||||
|
||||
let avg_patterns_with_override_settings = if self.total_received == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(self.sum_patterns_with_override_settings as f64 / self.total_received as f64)
|
||||
};
|
||||
|
||||
serde_json::json!({
|
||||
"total_received": self.total_received,
|
||||
"has_api_key": self.has_api_key,
|
||||
"avg_index_patterns": avg_index_patterns,
|
||||
"avg_patterns_with_filter": avg_patterns_with_filter,
|
||||
"avg_patterns_with_override_settings": avg_patterns_with_override_settings,
|
||||
"avg_payload_size": avg_payload_size,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -54,6 +54,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
get_task_documents_route: Some(false),
|
||||
composite_embedders: Some(false),
|
||||
chat_completions: Some(false),
|
||||
multimodal: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@@ -100,6 +101,8 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub composite_embedders: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub chat_completions: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub multimodal: Option<bool>,
|
||||
}
|
||||
|
||||
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
|
||||
@@ -113,6 +116,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
} = value;
|
||||
|
||||
Self {
|
||||
@@ -124,6 +128,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
get_task_documents_route: Some(get_task_documents_route),
|
||||
composite_embedders: Some(composite_embedders),
|
||||
chat_completions: Some(chat_completions),
|
||||
multimodal: Some(multimodal),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -138,6 +143,7 @@ pub struct PatchExperimentalFeatureAnalytics {
|
||||
get_task_documents_route: bool,
|
||||
composite_embedders: bool,
|
||||
chat_completions: bool,
|
||||
multimodal: bool,
|
||||
}
|
||||
|
||||
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
@@ -155,6 +161,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
get_task_documents_route: new.get_task_documents_route,
|
||||
composite_embedders: new.composite_embedders,
|
||||
chat_completions: new.chat_completions,
|
||||
multimodal: new.multimodal,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -181,6 +188,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
get_task_documents_route: Some(false),
|
||||
composite_embedders: Some(false),
|
||||
chat_completions: Some(false),
|
||||
multimodal: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@@ -223,6 +231,7 @@ async fn patch_features(
|
||||
.composite_embedders
|
||||
.unwrap_or(old_features.composite_embedders),
|
||||
chat_completions: new_features.0.chat_completions.unwrap_or(old_features.chat_completions),
|
||||
multimodal: new_features.0.multimodal.unwrap_or(old_features.multimodal),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
@@ -237,6 +246,7 @@ async fn patch_features(
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
@@ -249,6 +259,7 @@ async fn patch_features(
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
@@ -1452,7 +1452,6 @@ fn some_documents<'a, 't: 'a>(
|
||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||
|
||||
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
||||
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
|
||||
@@ -1468,15 +1467,9 @@ fn some_documents<'a, 't: 'a>(
|
||||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
};
|
||||
for (name, vector) in index.embeddings(rtxn, key)? {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(key));
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(vector.into()),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
for (name, (vector, regenerate)) in index.embeddings(rtxn, key)? {
|
||||
let embeddings =
|
||||
ExplicitVectors { embeddings: Some(vector.into()), regenerate };
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
||||
|
||||
@@ -56,6 +56,8 @@ pub struct FacetSearchQuery {
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMedia>)]
|
||||
pub media: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
@@ -94,6 +96,7 @@ impl FacetSearchAggregator {
|
||||
facet_name,
|
||||
vector,
|
||||
q,
|
||||
media,
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
@@ -108,6 +111,7 @@ impl FacetSearchAggregator {
|
||||
facet_names: Some(facet_name.clone()).into_iter().collect(),
|
||||
additional_search_parameters_provided: q.is_some()
|
||||
|| vector.is_some()
|
||||
|| media.is_some()
|
||||
|| filter.is_some()
|
||||
|| *matching_strategy != MatchingStrategy::default()
|
||||
|| attributes_to_search_on.is_some()
|
||||
@@ -291,6 +295,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
facet_name: _,
|
||||
q,
|
||||
vector,
|
||||
media,
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
@@ -312,6 +317,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
|
||||
SearchQuery {
|
||||
q,
|
||||
media,
|
||||
offset: DEFAULT_SEARCH_OFFSET(),
|
||||
limit: DEFAULT_SEARCH_LIMIT(),
|
||||
page,
|
||||
|
||||
@@ -205,6 +205,8 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
|
||||
Ok(Self {
|
||||
q: other.q,
|
||||
// `media` not supported for `GET`
|
||||
media: None,
|
||||
vector: other.vector.map(CS::into_inner),
|
||||
offset: other.offset.0,
|
||||
limit: other.limit.0,
|
||||
@@ -481,28 +483,30 @@ pub fn search_kind(
|
||||
index_uid: String,
|
||||
index: &milli::Index,
|
||||
) -> Result<SearchKind, ResponseError> {
|
||||
let is_placeholder_query =
|
||||
if let Some(q) = query.q.as_deref() { q.trim().is_empty() } else { true };
|
||||
let non_placeholder_query = !is_placeholder_query;
|
||||
let is_media = query.media.is_some();
|
||||
// handle with care, the order of cases matters, the semantics is subtle
|
||||
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
|
||||
// empty query, no vector => placeholder search
|
||||
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
|
||||
// no query, no vector => placeholder search
|
||||
(None, _, None) => Ok(SearchKind::KeywordOnly),
|
||||
// hybrid.semantic_ratio == 1.0 => vector
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// hybrid.semantic_ratio == 0.0 => keyword
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||
match (is_media, non_placeholder_query, &query.hybrid, query.vector.as_deref()) {
|
||||
// media + vector => error
|
||||
(true, _, _, Some(_)) => Err(MeilisearchHttpError::MediaAndVector.into()),
|
||||
// media + !hybrid => error
|
||||
(true, _, None, _) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
// vector + !hybrid => error
|
||||
(_, _, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
// hybrid S0 => keyword
|
||||
(_, _, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||
Ok(SearchKind::KeywordOnly)
|
||||
}
|
||||
// no query, hybrid, vector => semantic
|
||||
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, Some(v.len()))
|
||||
// !q + !vector => placeholder search
|
||||
(false, false, _, None) => Ok(SearchKind::KeywordOnly),
|
||||
// hybrid S100 => semantic
|
||||
(_, _, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// query, no hybrid, no vector => keyword
|
||||
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
|
||||
// query, hybrid, maybe vector => hybrid
|
||||
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||
// q + hybrid => hybrid
|
||||
(_, true, Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
index,
|
||||
@@ -510,7 +514,11 @@ pub fn search_kind(
|
||||
**semantic_ratio,
|
||||
v.map(|v| v.len()),
|
||||
),
|
||||
|
||||
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
// !q + hybrid => semantic
|
||||
(_, false, Some(HybridQuery { semantic_ratio: _, embedder }), v) => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// q => keyword
|
||||
(false, true, None, None) => Ok(SearchKind::KeywordOnly),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,6 +61,8 @@ pub struct SearchAggregator<Method: AggregateMethod> {
|
||||
semantic_ratio: bool,
|
||||
hybrid: bool,
|
||||
retrieve_vectors: bool,
|
||||
// Number of requests containing `media`
|
||||
total_media: usize,
|
||||
|
||||
// every time a search is done, we increment the counter linked to the used settings
|
||||
matching_strategy: HashMap<String, usize>,
|
||||
@@ -101,6 +103,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
let SearchQuery {
|
||||
q,
|
||||
vector,
|
||||
media,
|
||||
offset,
|
||||
limit,
|
||||
page,
|
||||
@@ -175,6 +178,11 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
if let Some(ref vector) = vector {
|
||||
ret.max_vector_size = vector.len();
|
||||
}
|
||||
|
||||
if media.is_some() {
|
||||
ret.total_media = 1;
|
||||
}
|
||||
|
||||
ret.retrieve_vectors |= retrieve_vectors;
|
||||
|
||||
if query.is_finite_pagination() {
|
||||
@@ -277,6 +285,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
hybrid,
|
||||
total_media,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
@@ -327,6 +336,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
self.retrieve_vectors |= retrieve_vectors;
|
||||
self.semantic_ratio |= semantic_ratio;
|
||||
self.hybrid |= hybrid;
|
||||
self.total_media += total_media;
|
||||
|
||||
// pagination
|
||||
self.max_limit = self.max_limit.max(max_limit);
|
||||
@@ -403,6 +413,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
hybrid,
|
||||
total_media,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
@@ -450,6 +461,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
"hybrid": {
|
||||
"enabled": hybrid,
|
||||
"semantic_ratio": semantic_ratio,
|
||||
"total_media": total_media,
|
||||
},
|
||||
"pagination": {
|
||||
"max_limit": max_limit,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user