mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-21 22:00:59 +00:00
Compare commits
311 Commits
clone-inde
...
embedding-
Author | SHA1 | Date | |
---|---|---|---|
6dc241f9de | |||
01d1ef65c4 | |||
3246667590 | |||
109395c199 | |||
a0b71a8785 | |||
00a5c86f13 | |||
366c37a686 | |||
afc164a271 | |||
0312fb22b8 | |||
f1d92bfead | |||
a005a062da | |||
fd8b2451d7 | |||
058f9ffda5 | |||
5d363205a5 | |||
a683faa882 | |||
8887cbdcd5 | |||
634865ff53 | |||
36fccf8525 | |||
d6bd60d569 | |||
48ad959fc1 | |||
1bc30cb4c8 | |||
77138a42d6 | |||
0791506124 | |||
2a015ac3b8 | |||
6f248b78a9 | |||
d694e312ff | |||
d76dcc8998 | |||
e654f66223 | |||
34f2ab7093 | |||
1a9dbd364e | |||
662c5d9871 | |||
5cd61b50f9 | |||
9a9be76757 | |||
cfa6ba6c3b | |||
f4f333dbf6 | |||
1ade76ba10 | |||
ae26658913 | |||
aa09edb3fb | |||
3f42f1a036 | |||
9bdfdd395b | |||
78d0625a91 | |||
3f655ea20e | |||
50bc1d55f3 | |||
0a4f2ef891 | |||
faa1f7c5b7 | |||
3cc5d86598 | |||
1ae47bec77 | |||
2f1be0ff86 | |||
9cee432255 | |||
ff8d48d2f1 | |||
a56c036994 | |||
511c48f520 | |||
4623691d1f | |||
3261aadcf2 | |||
073e9f2967 | |||
5f8f48ec95 | |||
ed2fe365a0 | |||
f7c8a77f89 | |||
a8030850ee | |||
132065afda | |||
51c298662b | |||
70a860a0f0 | |||
a3254d7d7d | |||
73c9c1ebdc | |||
4c7a6e5c1b | |||
ef4c87accf | |||
ced7ea4a5c | |||
fa3990daf9 | |||
c5993196b3 | |||
16234e1313 | |||
be9f4f96df | |||
b274106ad3 | |||
48527761e7 | |||
6792d048b8 | |||
07bfed99e6 | |||
8dfded2993 | |||
3714f16696 | |||
d0cd3cacec | |||
fef089c7b6 | |||
d47e1e15de | |||
caccb51814 | |||
a76a3e8f11 | |||
32dede35c7 | |||
6397ef12a0 | |||
cf9b311f71 | |||
7423243be0 | |||
b5e41f0e46 | |||
5690700601 | |||
2faad504c6 | |||
2bcd69750f | |||
9f0d33ec99 | |||
de24e75be8 | |||
a3af9fe057 | |||
90683d0e4e | |||
5c79273748 | |||
90e6b6416f | |||
2b75072b09 | |||
6e6fd077d4 | |||
b45eea0d3e | |||
a051ab3d9a | |||
0b89ef1fd7 | |||
65ba7b47af | |||
8af76a65bf | |||
6b94033c97 | |||
dfe0c8664e | |||
0ca652de28 | |||
87f105747f | |||
735634e998 | |||
3740755d9c | |||
bbcabc47bd | |||
a06cb1bfd6 | |||
549dc985b8 | |||
428463e45c | |||
7113fcf63a | |||
aa6855cd4f | |||
895db76a51 | |||
a88146d59e | |||
91e77abf4f | |||
f60814b319 | |||
5a675bcb82 | |||
82a796aea7 | |||
f6287602e9 | |||
ede456c5b0 | |||
3f5b5df139 | |||
d72e5f5f69 | |||
aa366d593d | |||
205430854d | |||
be64006211 | |||
eda309d562 | |||
119d618a76 | |||
2b2e6c0b3a | |||
e6329e77e1 | |||
b086c51a23 | |||
9ce5598fef | |||
e30c24b5bf | |||
c1a132fa06 | |||
e54fc59248 | |||
11e7c0d75f | |||
c593fbe648 | |||
2b3327ea74 | |||
d14184f4da | |||
46bceb91f1 | |||
cab5e35ff7 | |||
f8232976ed | |||
22d363c05a | |||
41620d5325 | |||
f3d5c74c02 | |||
d48baece51 | |||
c45ede44a8 | |||
4235a82dcf | |||
e7b9b8f002 | |||
5716ab70f3 | |||
422a786ffd | |||
836ae19bec | |||
0b5bc41b79 | |||
b45059e8f2 | |||
c16c60b599 | |||
0114796d2a | |||
17a94c40dc | |||
76ca44b214 | |||
d2e4d6dd8a | |||
879cf85037 | |||
c2d5b20a42 | |||
600178c5ab | |||
b93ca3945e | |||
8fef48f8ca | |||
dedae94102 | |||
7ae9a4afee | |||
d2776efb11 | |||
9211e94c4f | |||
b7bebe9bbb | |||
37a692f942 | |||
25c19a306b | |||
c078efd730 | |||
9dac91efe0 | |||
074d509d92 | |||
d439a3cb9d | |||
e92b6beb20 | |||
27cc357362 | |||
73dfeefc7c | |||
d85480de89 | |||
9f55708d84 | |||
280c3907be | |||
8419fd9b3b | |||
283944ea89 | |||
8aacd6374a | |||
8326f34ad1 | |||
259fc067d3 | |||
e8b2bb3ea6 | |||
7dfb2071b5 | |||
9cfbef478e | |||
efd5fd96cc | |||
f4a908669c | |||
eb2c2815b6 | |||
0ef52941c7 | |||
0d85f8fcee | |||
f4bb6cbca8 | |||
ad03c86c44 | |||
85037352b9 | |||
29e9c74a49 | |||
1b54c866e1 | |||
e414284335 | |||
7a204609fe | |||
f6803dd7d1 | |||
f86f4f619f | |||
e35d58b531 | |||
63827bbee0 | |||
6b2b8ed676 | |||
6db5939f84 | |||
d35b2d8d33 | |||
0687cf058a | |||
340d9e6edc | |||
7219299436 | |||
657bbf5d1e | |||
28adbc0d18 | |||
e3fba62e13 | |||
fb9170b8e3 | |||
c15763f910 | |||
7fa1c41190 | |||
77802dabf6 | |||
a685eeafeb | |||
f16e6f7c37 | |||
900be0ccad | |||
51a087b764 | |||
31142b3663 | |||
e60b855a54 | |||
510a4b91be | |||
e704f4d1ec | |||
82fe80b360 | |||
0f1dd3614c | |||
3aa6c3c750 | |||
b956918c11 | |||
e3003c1609 | |||
bf13268649 | |||
0bb7866f1e | |||
e6e9a033aa | |||
63031219c5 | |||
44d6430bae | |||
4d26e9c6f2 | |||
2ff382c023 | |||
0f6dd133b2 | |||
29f6eeff8f | |||
ef007d547d | |||
3fc16c627d | |||
9422b6d654 | |||
ddba52414a | |||
4534dc2cab | |||
b05cb80803 | |||
6e0526090a | |||
a743da3061 | |||
c6216517c7 | |||
2d4f7c635e | |||
ee812b31c4 | |||
3329248a84 | |||
bc08cd0deb | |||
3e2f468213 | |||
7c448bcc00 | |||
acb7c0a449 | |||
e8795d2608 | |||
e023ee4b6b | |||
e74c3b692a | |||
1d3b18f774 | |||
00bc86e74b | |||
adc9976615 | |||
2090e9ea31 | |||
1c8f1c18f4 | |||
ae8c1461e1 | |||
5f62274f21 | |||
c4a96b40eb | |||
5f50fc9464 | |||
89498a2bea | |||
211c1b753f | |||
d08e89ea3d | |||
695877043a | |||
bc4d1530ee | |||
d7721fe607 | |||
4a179fb3c0 | |||
59a1c5d9a7 | |||
2f82d94502 | |||
bd2bd0f33b | |||
e02733df4a | |||
f373ecc96a | |||
748a327271 | |||
4925b30196 | |||
43c4a229b7 | |||
ca112a8b95 | |||
855fa555a3 | |||
a237c0797a | |||
5c46dc702a | |||
4cadc8113b | |||
2d6dc83940 | |||
ab768f379f | |||
705e9a9e5e | |||
c17031d3de | |||
67f2a30d7c | |||
99732f4084 | |||
5081d837ea | |||
9e1cb792f4 | |||
b6b7ede266 | |||
f50e586a4f | |||
11fedea788 | |||
032b34c377 | |||
b421c8e7de | |||
00eb258a53 | |||
fc6cc80705 | |||
138d20b277 | |||
7c1a9113f9 | |||
07ae297ffd | |||
4069dbcfca | |||
03eb50fbac | |||
056f18bd02 |
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@ -32,7 +32,7 @@ jobs:
|
|||||||
- name: Build deb package
|
- name: Build deb package
|
||||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||||
- name: Upload debian pkg to release
|
- name: Upload debian pkg to release
|
||||||
uses: svenstaro/upload-release-action@2.7.0
|
uses: svenstaro/upload-release-action@2.11.1
|
||||||
with:
|
with:
|
||||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||||
file: target/debian/meilisearch.deb
|
file: target/debian/meilisearch.deb
|
||||||
|
8
.github/workflows/publish-binaries.yml
vendored
8
.github/workflows/publish-binaries.yml
vendored
@ -51,7 +51,7 @@ jobs:
|
|||||||
# No need to upload binaries for dry run (cron)
|
# No need to upload binaries for dry run (cron)
|
||||||
- name: Upload binaries to release
|
- name: Upload binaries to release
|
||||||
if: github.event_name == 'release'
|
if: github.event_name == 'release'
|
||||||
uses: svenstaro/upload-release-action@2.7.0
|
uses: svenstaro/upload-release-action@2.11.1
|
||||||
with:
|
with:
|
||||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||||
file: target/release/meilisearch
|
file: target/release/meilisearch
|
||||||
@ -81,7 +81,7 @@ jobs:
|
|||||||
# No need to upload binaries for dry run (cron)
|
# No need to upload binaries for dry run (cron)
|
||||||
- name: Upload binaries to release
|
- name: Upload binaries to release
|
||||||
if: github.event_name == 'release'
|
if: github.event_name == 'release'
|
||||||
uses: svenstaro/upload-release-action@2.7.0
|
uses: svenstaro/upload-release-action@2.11.1
|
||||||
with:
|
with:
|
||||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||||
file: target/release/${{ matrix.artifact_name }}
|
file: target/release/${{ matrix.artifact_name }}
|
||||||
@ -113,7 +113,7 @@ jobs:
|
|||||||
- name: Upload the binary to release
|
- name: Upload the binary to release
|
||||||
# No need to upload binaries for dry run (cron)
|
# No need to upload binaries for dry run (cron)
|
||||||
if: github.event_name == 'release'
|
if: github.event_name == 'release'
|
||||||
uses: svenstaro/upload-release-action@2.7.0
|
uses: svenstaro/upload-release-action@2.11.1
|
||||||
with:
|
with:
|
||||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||||
file: target/${{ matrix.target }}/release/meilisearch
|
file: target/${{ matrix.target }}/release/meilisearch
|
||||||
@ -178,7 +178,7 @@ jobs:
|
|||||||
- name: Upload the binary to release
|
- name: Upload the binary to release
|
||||||
# No need to upload binaries for dry run (cron)
|
# No need to upload binaries for dry run (cron)
|
||||||
if: github.event_name == 'release'
|
if: github.event_name == 'release'
|
||||||
uses: svenstaro/upload-release-action@2.7.0
|
uses: svenstaro/upload-release-action@2.11.1
|
||||||
with:
|
with:
|
||||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||||
file: target/${{ matrix.target }}/release/meilisearch
|
file: target/${{ matrix.target }}/release/meilisearch
|
||||||
|
10
.github/workflows/test-suite.yml
vendored
10
.github/workflows/test-suite.yml
vendored
@ -29,7 +29,7 @@ jobs:
|
|||||||
- name: Setup test with Rust stable
|
- name: Setup test with Rust stable
|
||||||
uses: dtolnay/rust-toolchain@1.85
|
uses: dtolnay/rust-toolchain@1.85
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.7.8
|
uses: Swatinem/rust-cache@v2.8.0
|
||||||
- name: Run cargo check without any default features
|
- name: Run cargo check without any default features
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
@ -51,7 +51,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.7.8
|
uses: Swatinem/rust-cache@v2.8.0
|
||||||
- uses: dtolnay/rust-toolchain@1.85
|
- uses: dtolnay/rust-toolchain@1.85
|
||||||
- name: Run cargo check without any default features
|
- name: Run cargo check without any default features
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
@ -155,7 +155,7 @@ jobs:
|
|||||||
apt-get install build-essential -y
|
apt-get install build-essential -y
|
||||||
- uses: dtolnay/rust-toolchain@1.85
|
- uses: dtolnay/rust-toolchain@1.85
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.7.8
|
uses: Swatinem/rust-cache@v2.8.0
|
||||||
- name: Run tests in debug
|
- name: Run tests in debug
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
@ -172,7 +172,7 @@ jobs:
|
|||||||
profile: minimal
|
profile: minimal
|
||||||
components: clippy
|
components: clippy
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.7.8
|
uses: Swatinem/rust-cache@v2.8.0
|
||||||
- name: Run cargo clippy
|
- name: Run cargo clippy
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
@ -191,7 +191,7 @@ jobs:
|
|||||||
override: true
|
override: true
|
||||||
components: rustfmt
|
components: rustfmt
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.7.8
|
uses: Swatinem/rust-cache@v2.8.0
|
||||||
- name: Run cargo fmt
|
- name: Run cargo fmt
|
||||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||||
|
11
.gitignore
vendored
11
.gitignore
vendored
@ -11,12 +11,21 @@
|
|||||||
/bench
|
/bench
|
||||||
/_xtask_benchmark.ms
|
/_xtask_benchmark.ms
|
||||||
/benchmarks
|
/benchmarks
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
# Snapshots
|
# Snapshots
|
||||||
## ... large
|
## ... large
|
||||||
*.full.snap
|
*.full.snap
|
||||||
## ... unreviewed
|
## ... unreviewed
|
||||||
*.snap.new
|
*.snap.new
|
||||||
|
## ... pending
|
||||||
|
*.pending-snap
|
||||||
|
|
||||||
|
# Tmp files
|
||||||
|
.tmp*
|
||||||
|
|
||||||
|
# Database snapshot
|
||||||
|
crates/meilisearch/db.snapshot
|
||||||
|
|
||||||
# Fuzzcheck data for the facet indexing fuzz test
|
# Fuzzcheck data for the facet indexing fuzz test
|
||||||
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
|
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
|
||||||
|
666
Cargo.lock
generated
666
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -22,7 +22,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.15.2"
|
version = "1.16.0"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
"Clément Renault <clement@meilisearch.com>",
|
"Clément Renault <clement@meilisearch.com>",
|
||||||
|
@ -11,27 +11,27 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.95"
|
anyhow = "1.0.98"
|
||||||
bumpalo = "3.16.0"
|
bumpalo = "3.18.1"
|
||||||
csv = "1.3.1"
|
csv = "1.3.1"
|
||||||
memmap2 = "0.9.5"
|
memmap2 = "0.9.5"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
mimalloc = { version = "0.1.43", default-features = false }
|
mimalloc = { version = "0.1.47", default-features = false }
|
||||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||||
tempfile = "3.15.0"
|
tempfile = "3.20.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
rand_chacha = "0.3.1"
|
rand_chacha = "0.3.1"
|
||||||
roaring = "0.10.10"
|
roaring = "0.10.12"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = "1.0.95"
|
anyhow = "1.0.98"
|
||||||
bytes = "1.9.0"
|
bytes = "1.10.1"
|
||||||
convert_case = "0.6.0"
|
convert_case = "0.8.0"
|
||||||
flate2 = "1.0.35"
|
flate2 = "1.1.2"
|
||||||
reqwest = { version = "0.12.15", features = ["blocking", "rustls-tls"], default-features = false }
|
reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["milli/all-tokenizations"]
|
default = ["milli/all-tokenizations"]
|
||||||
@ -51,3 +51,8 @@ harness = false
|
|||||||
[[bench]]
|
[[bench]]
|
||||||
name = "indexing"
|
name = "indexing"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "sort"
|
||||||
|
harness = false
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ use milli::heed::{EnvOpenOptions, RwTxn};
|
|||||||
use milli::progress::Progress;
|
use milli::progress::Progress;
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexerConfig, Settings};
|
use milli::update::{IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::RuntimeEmbedders;
|
||||||
use milli::{FilterableAttributesRule, Index};
|
use milli::{FilterableAttributesRule, Index};
|
||||||
use rand::seq::SliceRandom;
|
use rand::seq::SliceRandom;
|
||||||
use rand_chacha::rand_core::SeedableRng;
|
use rand_chacha::rand_core::SeedableRng;
|
||||||
@ -65,7 +65,7 @@ fn setup_settings<'t>(
|
|||||||
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
||||||
builder.set_sortable_fields(sortable_fields);
|
builder.set_sortable_fields(sortable_fields);
|
||||||
|
|
||||||
builder.execute(|_| (), || false).unwrap();
|
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn setup_index_with_settings(
|
fn setup_index_with_settings(
|
||||||
@ -166,9 +166,10 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -232,9 +233,10 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -276,9 +278,10 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -344,9 +347,10 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -420,9 +424,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -464,9 +469,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -504,9 +510,10 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -571,9 +578,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -637,9 +645,10 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -703,9 +712,10 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -768,9 +778,10 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -812,9 +823,10 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -879,9 +891,10 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -955,9 +968,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1000,9 +1014,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1041,9 +1056,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1107,9 +1123,10 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1172,9 +1189,10 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1216,9 +1234,10 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1283,9 +1302,10 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1331,9 +1351,10 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
Some(primary_key),
|
Some(primary_key),
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1395,9 +1416,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1439,9 +1461,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1479,9 +1502,10 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1568,9 +1592,10 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1658,9 +1683,10 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1740,9 +1766,10 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1806,9 +1833,10 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1871,9 +1899,10 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1915,9 +1944,10 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1982,9 +2012,10 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
114
crates/benchmarks/benches/sort.rs
Normal file
114
crates/benchmarks/benches/sort.rs
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
//! This benchmark module is used to compare the performance of sorting documents in /search VS /documents
|
||||||
|
//!
|
||||||
|
//! The tests/benchmarks were designed in the context of a query returning only 20 documents.
|
||||||
|
|
||||||
|
mod datasets_paths;
|
||||||
|
mod utils;
|
||||||
|
|
||||||
|
use criterion::{criterion_group, criterion_main};
|
||||||
|
use milli::update::Settings;
|
||||||
|
use utils::Conf;
|
||||||
|
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
#[global_allocator]
|
||||||
|
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||||
|
|
||||||
|
fn base_conf(builder: &mut Settings) {
|
||||||
|
let displayed_fields =
|
||||||
|
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect();
|
||||||
|
builder.set_displayed_fields(displayed_fields);
|
||||||
|
|
||||||
|
let sortable_fields =
|
||||||
|
["_geo", "name", "population", "elevation", "timezone", "modification-date"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect();
|
||||||
|
builder.set_sortable_fields(sortable_fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[rustfmt::skip]
|
||||||
|
const BASE_CONF: Conf = Conf {
|
||||||
|
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
|
||||||
|
dataset_format: "jsonl",
|
||||||
|
configure: base_conf,
|
||||||
|
primary_key: Some("geonameid"),
|
||||||
|
queries: &[""],
|
||||||
|
offsets: &[
|
||||||
|
Some((0, 20)), // The most common query in the real world
|
||||||
|
Some((0, 500)), // A query that ranges over many documents
|
||||||
|
Some((980, 20)), // The worst query that could happen in the real world
|
||||||
|
Some((800_000, 20)) // The worst query
|
||||||
|
],
|
||||||
|
get_documents: true,
|
||||||
|
..Conf::BASE
|
||||||
|
};
|
||||||
|
|
||||||
|
fn bench_sort(c: &mut criterion::Criterion) {
|
||||||
|
#[rustfmt::skip]
|
||||||
|
let confs = &[
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "without sort",
|
||||||
|
sort: None,
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "sort on many different values",
|
||||||
|
sort: Some(vec!["name:asc"]),
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "sort on many similar values",
|
||||||
|
sort: Some(vec!["timezone:desc"]),
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "sort on many similar then different values",
|
||||||
|
sort: Some(vec!["timezone:desc", "name:asc"]),
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "sort on many different then similar values",
|
||||||
|
sort: Some(vec!["timezone:desc", "name:asc"]),
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "geo sort",
|
||||||
|
sample_size: Some(10),
|
||||||
|
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "sort on many similar values then geo sort",
|
||||||
|
sample_size: Some(50),
|
||||||
|
sort: Some(vec!["timezone:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "sort on many different values then geo sort",
|
||||||
|
sample_size: Some(50),
|
||||||
|
sort: Some(vec!["name:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "sort on many fields",
|
||||||
|
sort: Some(vec!["population:asc", "name:asc", "elevation:asc", "timezone:asc"]),
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
utils::run_benches(c, confs);
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, bench_sort);
|
||||||
|
criterion_main!(benches);
|
@ -9,11 +9,12 @@ use anyhow::Context;
|
|||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use criterion::BenchmarkId;
|
use criterion::BenchmarkId;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
|
use milli::documents::sort::recursive_sort;
|
||||||
use milli::heed::EnvOpenOptions;
|
use milli::heed::EnvOpenOptions;
|
||||||
use milli::progress::Progress;
|
use milli::progress::Progress;
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexerConfig, Settings};
|
use milli::update::{IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::RuntimeEmbedders;
|
||||||
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
|
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
@ -35,6 +36,12 @@ pub struct Conf<'a> {
|
|||||||
pub configure: fn(&mut Settings),
|
pub configure: fn(&mut Settings),
|
||||||
pub filter: Option<&'a str>,
|
pub filter: Option<&'a str>,
|
||||||
pub sort: Option<Vec<&'a str>>,
|
pub sort: Option<Vec<&'a str>>,
|
||||||
|
/// set to skip documents (offset, limit)
|
||||||
|
pub offsets: &'a [Option<(usize, usize)>],
|
||||||
|
/// enable if you want to bench getting documents without querying
|
||||||
|
pub get_documents: bool,
|
||||||
|
/// configure the benchmark sample size
|
||||||
|
pub sample_size: Option<usize>,
|
||||||
/// enable or disable the optional words on the query
|
/// enable or disable the optional words on the query
|
||||||
pub optional_words: bool,
|
pub optional_words: bool,
|
||||||
/// primary key, if there is None we'll auto-generate docids for every documents
|
/// primary key, if there is None we'll auto-generate docids for every documents
|
||||||
@ -52,6 +59,9 @@ impl Conf<'_> {
|
|||||||
configure: |_| (),
|
configure: |_| (),
|
||||||
filter: None,
|
filter: None,
|
||||||
sort: None,
|
sort: None,
|
||||||
|
offsets: &[None],
|
||||||
|
get_documents: false,
|
||||||
|
sample_size: None,
|
||||||
optional_words: true,
|
optional_words: true,
|
||||||
primary_key: None,
|
primary_key: None,
|
||||||
};
|
};
|
||||||
@ -90,7 +100,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
|
|
||||||
(conf.configure)(&mut builder);
|
(conf.configure)(&mut builder);
|
||||||
|
|
||||||
builder.execute(|_| (), || false).unwrap();
|
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
@ -125,9 +135,10 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key,
|
primary_key,
|
||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
RuntimeEmbedders::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -144,25 +155,79 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
|||||||
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
|
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
|
||||||
let name = format!("{}: {}", file_name, conf.group_name);
|
let name = format!("{}: {}", file_name, conf.group_name);
|
||||||
let mut group = c.benchmark_group(&name);
|
let mut group = c.benchmark_group(&name);
|
||||||
|
if let Some(sample_size) = conf.sample_size {
|
||||||
|
group.sample_size(sample_size);
|
||||||
|
}
|
||||||
|
|
||||||
for &query in conf.queries {
|
for &query in conf.queries {
|
||||||
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
|
for offset in conf.offsets {
|
||||||
b.iter(|| {
|
let parameter = match offset {
|
||||||
let rtxn = index.read_txn().unwrap();
|
None => query.to_string(),
|
||||||
let mut search = index.search(&rtxn);
|
Some((offset, limit)) => format!("{query}[{offset}:{limit}]"),
|
||||||
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
|
};
|
||||||
if let Some(filter) = conf.filter {
|
group.bench_with_input(
|
||||||
let filter = Filter::from_str(filter).unwrap().unwrap();
|
BenchmarkId::from_parameter(parameter),
|
||||||
search.filter(filter);
|
&query,
|
||||||
}
|
|b, &query| {
|
||||||
if let Some(sort) = &conf.sort {
|
b.iter(|| {
|
||||||
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
let rtxn = index.read_txn().unwrap();
|
||||||
search.sort_criteria(sort);
|
let mut search = index.search(&rtxn);
|
||||||
}
|
search
|
||||||
let _ids = search.execute().unwrap();
|
.query(query)
|
||||||
});
|
.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
});
|
if let Some(filter) = conf.filter {
|
||||||
|
let filter = Filter::from_str(filter).unwrap().unwrap();
|
||||||
|
search.filter(filter);
|
||||||
|
}
|
||||||
|
if let Some(sort) = &conf.sort {
|
||||||
|
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
||||||
|
search.sort_criteria(sort);
|
||||||
|
}
|
||||||
|
if let Some((offset, limit)) = offset {
|
||||||
|
search.offset(*offset).limit(*limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
let _ids = search.execute().unwrap();
|
||||||
|
});
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if conf.get_documents {
|
||||||
|
for offset in conf.offsets {
|
||||||
|
let parameter = match offset {
|
||||||
|
None => String::from("get_documents"),
|
||||||
|
Some((offset, limit)) => format!("get_documents[{offset}:{limit}]"),
|
||||||
|
};
|
||||||
|
group.bench_with_input(BenchmarkId::from_parameter(parameter), &(), |b, &()| {
|
||||||
|
b.iter(|| {
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
if let Some(sort) = &conf.sort {
|
||||||
|
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
||||||
|
let all_docs = index.documents_ids(&rtxn).unwrap();
|
||||||
|
let facet_sort =
|
||||||
|
recursive_sort(&index, &rtxn, sort, &all_docs).unwrap();
|
||||||
|
let iter = facet_sort.iter().unwrap();
|
||||||
|
if let Some((offset, limit)) = offset {
|
||||||
|
let _results = iter.skip(*offset).take(*limit).collect::<Vec<_>>();
|
||||||
|
} else {
|
||||||
|
let _results = iter.collect::<Vec<_>>();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let all_docs = index.documents_ids(&rtxn).unwrap();
|
||||||
|
if let Some((offset, limit)) = offset {
|
||||||
|
let _results =
|
||||||
|
all_docs.iter().skip(*offset).take(*limit).collect::<Vec<_>>();
|
||||||
|
} else {
|
||||||
|
let _results = all_docs.iter().collect::<Vec<_>>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
group.finish();
|
group.finish();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
|
@ -67,7 +67,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
writeln!(
|
writeln!(
|
||||||
&mut manifest_paths_file,
|
&mut manifest_paths_file,
|
||||||
r#"pub const {}: &str = {:?};"#,
|
r#"pub const {}: &str = {:?};"#,
|
||||||
dataset.to_case(Case::ScreamingSnake),
|
dataset.to_case(Case::UpperSnake),
|
||||||
out_file.display(),
|
out_file.display(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
@ -11,8 +11,8 @@ license.workspace = true
|
|||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
time = { version = "0.3.37", features = ["parsing"] }
|
time = { version = "0.3.41", features = ["parsing"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = "1.0.95"
|
anyhow = "1.0.98"
|
||||||
vergen-git2 = "1.0.2"
|
vergen-git2 = "1.0.7"
|
||||||
|
@ -11,21 +11,21 @@ readme.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.95"
|
anyhow = "1.0.98"
|
||||||
flate2 = "1.0.35"
|
flate2 = "1.1.2"
|
||||||
http = "1.2.0"
|
http = "1.3.1"
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
once_cell = "1.20.2"
|
once_cell = "1.21.3"
|
||||||
regex = "1.11.1"
|
regex = "1.11.1"
|
||||||
roaring = { version = "0.10.10", features = ["serde"] }
|
roaring = { version = "0.10.12", features = ["serde"] }
|
||||||
serde = { version = "1.0.217", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||||
tar = "0.4.43"
|
tar = "0.4.44"
|
||||||
tempfile = "3.15.0"
|
tempfile = "3.20.0"
|
||||||
thiserror = "2.0.9"
|
thiserror = "2.0.12"
|
||||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||||
tracing = "0.1.41"
|
tracing = "0.1.41"
|
||||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
big_s = "1.0.2"
|
big_s = "1.0.2"
|
||||||
|
@ -1,12 +1,17 @@
|
|||||||
#![allow(clippy::type_complexity)]
|
#![allow(clippy::type_complexity)]
|
||||||
#![allow(clippy::wrong_self_convention)]
|
#![allow(clippy::wrong_self_convention)]
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
use meilisearch_types::batches::BatchId;
|
use meilisearch_types::batches::BatchId;
|
||||||
|
use meilisearch_types::byte_unit::Byte;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::keys::Key;
|
use meilisearch_types::keys::Key;
|
||||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||||
use meilisearch_types::settings::Unchecked;
|
use meilisearch_types::settings::Unchecked;
|
||||||
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId};
|
use meilisearch_types::tasks::{
|
||||||
|
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
|
||||||
|
};
|
||||||
use meilisearch_types::InstanceUid;
|
use meilisearch_types::InstanceUid;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@ -141,6 +146,12 @@ pub enum KindDump {
|
|||||||
instance_uid: Option<InstanceUid>,
|
instance_uid: Option<InstanceUid>,
|
||||||
},
|
},
|
||||||
SnapshotCreation,
|
SnapshotCreation,
|
||||||
|
Export {
|
||||||
|
url: String,
|
||||||
|
api_key: Option<String>,
|
||||||
|
payload_size: Option<Byte>,
|
||||||
|
indexes: BTreeMap<String, ExportIndexSettings>,
|
||||||
|
},
|
||||||
UpgradeDatabase {
|
UpgradeDatabase {
|
||||||
from: (u32, u32, u32),
|
from: (u32, u32, u32),
|
||||||
},
|
},
|
||||||
@ -213,6 +224,15 @@ impl From<KindWithContent> for KindDump {
|
|||||||
KindDump::DumpCreation { keys, instance_uid }
|
KindDump::DumpCreation { keys, instance_uid }
|
||||||
}
|
}
|
||||||
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
|
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
|
||||||
|
KindWithContent::Export { url, api_key, payload_size, indexes } => KindDump::Export {
|
||||||
|
url,
|
||||||
|
api_key,
|
||||||
|
payload_size,
|
||||||
|
indexes: indexes
|
||||||
|
.into_iter()
|
||||||
|
.map(|(pattern, settings)| (pattern.to_string(), settings))
|
||||||
|
.collect(),
|
||||||
|
},
|
||||||
KindWithContent::UpgradeDatabase { from: version } => {
|
KindWithContent::UpgradeDatabase { from: version } => {
|
||||||
KindDump::UpgradeDatabase { from: version }
|
KindDump::UpgradeDatabase { from: version }
|
||||||
}
|
}
|
||||||
@ -329,6 +349,7 @@ pub(crate) mod test {
|
|||||||
write_channel_congestion: None,
|
write_channel_congestion: None,
|
||||||
internal_database_sizes: Default::default(),
|
internal_database_sizes: Default::default(),
|
||||||
},
|
},
|
||||||
|
embedder_stats: Default::default(),
|
||||||
enqueued_at: Some(BatchEnqueuedAt {
|
enqueued_at: Some(BatchEnqueuedAt {
|
||||||
earliest: datetime!(2022-11-11 0:00 UTC),
|
earliest: datetime!(2022-11-11 0:00 UTC),
|
||||||
oldest: datetime!(2022-11-11 0:00 UTC),
|
oldest: datetime!(2022-11-11 0:00 UTC),
|
||||||
|
@ -116,6 +116,15 @@ impl DumpReader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn chat_completions_settings(
|
||||||
|
&mut self,
|
||||||
|
) -> Result<Box<dyn Iterator<Item = Result<(String, v6::ChatCompletionSettings)>> + '_>> {
|
||||||
|
match self {
|
||||||
|
DumpReader::Current(current) => current.chat_completions_settings(),
|
||||||
|
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
|
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
|
||||||
match self {
|
match self {
|
||||||
DumpReader::Current(current) => Ok(current.features()),
|
DumpReader::Current(current) => Ok(current.features()),
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::ffi::OsStr;
|
||||||
use std::fs::{self, File};
|
use std::fs::{self, File};
|
||||||
use std::io::{BufRead, BufReader, ErrorKind};
|
use std::io::{BufRead, BufReader, ErrorKind};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@ -21,6 +22,7 @@ pub type Unchecked = meilisearch_types::settings::Unchecked;
|
|||||||
pub type Task = crate::TaskDump;
|
pub type Task = crate::TaskDump;
|
||||||
pub type Batch = meilisearch_types::batches::Batch;
|
pub type Batch = meilisearch_types::batches::Batch;
|
||||||
pub type Key = meilisearch_types::keys::Key;
|
pub type Key = meilisearch_types::keys::Key;
|
||||||
|
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
|
||||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||||
pub type Network = meilisearch_types::features::Network;
|
pub type Network = meilisearch_types::features::Network;
|
||||||
|
|
||||||
@ -192,6 +194,34 @@ impl V6Reader {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn chat_completions_settings(
|
||||||
|
&mut self,
|
||||||
|
) -> Result<Box<dyn Iterator<Item = Result<(String, ChatCompletionSettings)>> + '_>> {
|
||||||
|
let entries = match fs::read_dir(self.dump.path().join("chat-completions-settings")) {
|
||||||
|
Ok(entries) => entries,
|
||||||
|
Err(e) if e.kind() == ErrorKind::NotFound => return Ok(Box::new(std::iter::empty())),
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
};
|
||||||
|
Ok(Box::new(
|
||||||
|
entries
|
||||||
|
.map(|entry| -> Result<Option<_>> {
|
||||||
|
let entry = entry?;
|
||||||
|
let file_name = entry.file_name();
|
||||||
|
let path = Path::new(&file_name);
|
||||||
|
if entry.file_type()?.is_file() && path.extension() == Some(OsStr::new("json"))
|
||||||
|
{
|
||||||
|
let name = path.file_stem().unwrap().to_str().unwrap().to_string();
|
||||||
|
let file = File::open(entry.path())?;
|
||||||
|
let settings = serde_json::from_reader(file)?;
|
||||||
|
Ok(Some((name, settings)))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.filter_map(|entry| entry.transpose()),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
|
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
|
||||||
self.features
|
self.features
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ use std::path::PathBuf;
|
|||||||
use flate2::write::GzEncoder;
|
use flate2::write::GzEncoder;
|
||||||
use flate2::Compression;
|
use flate2::Compression;
|
||||||
use meilisearch_types::batches::Batch;
|
use meilisearch_types::batches::Batch;
|
||||||
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
|
use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures};
|
||||||
use meilisearch_types::keys::Key;
|
use meilisearch_types::keys::Key;
|
||||||
use meilisearch_types::settings::{Checked, Settings};
|
use meilisearch_types::settings::{Checked, Settings};
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
@ -51,6 +51,10 @@ impl DumpWriter {
|
|||||||
KeyWriter::new(self.dir.path().to_path_buf())
|
KeyWriter::new(self.dir.path().to_path_buf())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn create_chat_completions_settings(&self) -> Result<ChatCompletionsSettingsWriter> {
|
||||||
|
ChatCompletionsSettingsWriter::new(self.dir.path().join("chat-completions-settings"))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn create_tasks_queue(&self) -> Result<TaskWriter> {
|
pub fn create_tasks_queue(&self) -> Result<TaskWriter> {
|
||||||
TaskWriter::new(self.dir.path().join("tasks"))
|
TaskWriter::new(self.dir.path().join("tasks"))
|
||||||
}
|
}
|
||||||
@ -104,6 +108,24 @@ impl KeyWriter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct ChatCompletionsSettingsWriter {
|
||||||
|
path: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChatCompletionsSettingsWriter {
|
||||||
|
pub(crate) fn new(path: PathBuf) -> Result<Self> {
|
||||||
|
std::fs::create_dir(&path)?;
|
||||||
|
Ok(ChatCompletionsSettingsWriter { path })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn push_settings(&mut self, name: &str, settings: &ChatCompletionSettings) -> Result<()> {
|
||||||
|
let mut settings_file = File::create(self.path.join(name).with_extension("json"))?;
|
||||||
|
serde_json::to_writer(&mut settings_file, &settings)?;
|
||||||
|
settings_file.flush()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct TaskWriter {
|
pub struct TaskWriter {
|
||||||
queue: BufWriter<File>,
|
queue: BufWriter<File>,
|
||||||
update_files: PathBuf,
|
update_files: PathBuf,
|
||||||
|
@ -11,7 +11,7 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
tempfile = "3.15.0"
|
tempfile = "3.20.0"
|
||||||
thiserror = "2.0.9"
|
thiserror = "2.0.12"
|
||||||
tracing = "0.1.41"
|
tracing = "0.1.41"
|
||||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||||
|
@ -14,7 +14,7 @@ license.workspace = true
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
nom = "7.1.3"
|
nom = "7.1.3"
|
||||||
nom_locate = "4.2.0"
|
nom_locate = "4.2.0"
|
||||||
unescaper = "0.1.5"
|
unescaper = "0.1.6"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
# fixed version due to format breakages in v1.40
|
# fixed version due to format breakages in v1.40
|
||||||
|
@ -16,7 +16,7 @@ license.workspace = true
|
|||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
name = "benchmarks"
|
name = "benchmarks"
|
||||||
|
@ -12,11 +12,11 @@ license.workspace = true
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||||
bumpalo = "3.16.0"
|
bumpalo = "3.18.1"
|
||||||
clap = { version = "4.5.24", features = ["derive"] }
|
clap = { version = "4.5.40", features = ["derive"] }
|
||||||
either = "1.13.0"
|
either = "1.15.0"
|
||||||
fastrand = "2.3.0"
|
fastrand = "2.3.0"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
serde = { version = "1.0.217", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||||
tempfile = "3.15.0"
|
tempfile = "3.20.0"
|
||||||
|
@ -13,7 +13,7 @@ use milli::heed::EnvOpenOptions;
|
|||||||
use milli::progress::Progress;
|
use milli::progress::Progress;
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::IndexerConfig;
|
use milli::update::IndexerConfig;
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::RuntimeEmbedders;
|
||||||
use milli::Index;
|
use milli::Index;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
@ -89,7 +89,7 @@ fn main() {
|
|||||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||||
|
|
||||||
let indexer_alloc = Bump::new();
|
let indexer_alloc = Bump::new();
|
||||||
let embedders = EmbeddingConfigs::default();
|
let embedders = RuntimeEmbedders::default();
|
||||||
let mut indexer = indexer::DocumentOperation::new();
|
let mut indexer = indexer::DocumentOperation::new();
|
||||||
|
|
||||||
let mut operations = Vec::new();
|
let mut operations = Vec::new();
|
||||||
@ -144,6 +144,7 @@ fn main() {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -11,31 +11,31 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.95"
|
anyhow = "1.0.98"
|
||||||
bincode = "1.3.3"
|
bincode = "1.3.3"
|
||||||
byte-unit = "5.1.6"
|
byte-unit = "5.1.6"
|
||||||
bumpalo = "3.16.0"
|
bumpalo = "3.18.1"
|
||||||
bumparaw-collections = "0.1.4"
|
bumparaw-collections = "0.1.4"
|
||||||
convert_case = "0.6.0"
|
convert_case = "0.8.0"
|
||||||
csv = "1.3.1"
|
csv = "1.3.1"
|
||||||
derive_builder = "0.20.2"
|
derive_builder = "0.20.2"
|
||||||
dump = { path = "../dump" }
|
dump = { path = "../dump" }
|
||||||
enum-iterator = "2.1.0"
|
enum-iterator = "2.1.0"
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
flate2 = "1.0.35"
|
flate2 = "1.1.2"
|
||||||
indexmap = "2.7.0"
|
indexmap = "2.9.0"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
memmap2 = "0.9.5"
|
memmap2 = "0.9.5"
|
||||||
page_size = "0.6.0"
|
page_size = "0.6.0"
|
||||||
rayon = "1.10.0"
|
rayon = "1.10.0"
|
||||||
roaring = { version = "0.10.10", features = ["serde"] }
|
roaring = { version = "0.10.12", features = ["serde"] }
|
||||||
serde = { version = "1.0.217", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.138", features = ["preserve_order"] }
|
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||||
synchronoise = "1.0.1"
|
synchronoise = "1.0.1"
|
||||||
tempfile = "3.15.0"
|
tempfile = "3.20.0"
|
||||||
thiserror = "2.0.9"
|
thiserror = "2.0.12"
|
||||||
time = { version = "0.3.37", features = [
|
time = { version = "0.3.41", features = [
|
||||||
"serde-well-known",
|
"serde-well-known",
|
||||||
"formatting",
|
"formatting",
|
||||||
"parsing",
|
"parsing",
|
||||||
@ -43,7 +43,8 @@ time = { version = "0.3.37", features = [
|
|||||||
] }
|
] }
|
||||||
tracing = "0.1.41"
|
tracing = "0.1.41"
|
||||||
ureq = "2.12.1"
|
ureq = "2.12.1"
|
||||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||||
|
backoff = "0.4.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
big_s = "1.0.2"
|
big_s = "1.0.2"
|
||||||
|
@ -4,6 +4,7 @@ use std::io;
|
|||||||
use dump::{KindDump, TaskDump, UpdateFile};
|
use dump::{KindDump, TaskDump, UpdateFile};
|
||||||
use meilisearch_types::batches::{Batch, BatchId};
|
use meilisearch_types::batches::{Batch, BatchId};
|
||||||
use meilisearch_types::heed::RwTxn;
|
use meilisearch_types::heed::RwTxn;
|
||||||
|
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||||
use meilisearch_types::milli;
|
use meilisearch_types::milli;
|
||||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -211,6 +212,23 @@ impl<'a> Dump<'a> {
|
|||||||
KindWithContent::DumpCreation { keys, instance_uid }
|
KindWithContent::DumpCreation { keys, instance_uid }
|
||||||
}
|
}
|
||||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||||
|
KindDump::Export { url, api_key, payload_size, indexes } => {
|
||||||
|
KindWithContent::Export {
|
||||||
|
url,
|
||||||
|
api_key,
|
||||||
|
payload_size,
|
||||||
|
indexes: indexes
|
||||||
|
.into_iter()
|
||||||
|
.map(|(pattern, settings)| {
|
||||||
|
Ok((
|
||||||
|
IndexUidPattern::try_from(pattern)
|
||||||
|
.map_err(|_| Error::CorruptedDump)?,
|
||||||
|
settings,
|
||||||
|
))
|
||||||
|
})
|
||||||
|
.collect::<Result<_, Error>>()?,
|
||||||
|
}
|
||||||
|
}
|
||||||
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@ -151,6 +151,10 @@ pub enum Error {
|
|||||||
CorruptedTaskQueue,
|
CorruptedTaskQueue,
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
DatabaseUpgrade(Box<Self>),
|
DatabaseUpgrade(Box<Self>),
|
||||||
|
#[error(transparent)]
|
||||||
|
Export(Box<Self>),
|
||||||
|
#[error("Failed to export documents to remote server {code} ({type}): {message} <{link}>")]
|
||||||
|
FromRemoteWhenExporting { message: String, code: String, r#type: String, link: String },
|
||||||
#[error("Failed to rollback for index `{index}`: {rollback_outcome} ")]
|
#[error("Failed to rollback for index `{index}`: {rollback_outcome} ")]
|
||||||
RollbackFailed { index: String, rollback_outcome: RollbackOutcome },
|
RollbackFailed { index: String, rollback_outcome: RollbackOutcome },
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
@ -212,6 +216,7 @@ impl Error {
|
|||||||
| Error::BatchNotFound(_)
|
| Error::BatchNotFound(_)
|
||||||
| Error::TaskDeletionWithEmptyQuery
|
| Error::TaskDeletionWithEmptyQuery
|
||||||
| Error::TaskCancelationWithEmptyQuery
|
| Error::TaskCancelationWithEmptyQuery
|
||||||
|
| Error::FromRemoteWhenExporting { .. }
|
||||||
| Error::AbortedTask
|
| Error::AbortedTask
|
||||||
| Error::Dump(_)
|
| Error::Dump(_)
|
||||||
| Error::Heed(_)
|
| Error::Heed(_)
|
||||||
@ -221,6 +226,7 @@ impl Error {
|
|||||||
| Error::IoError(_)
|
| Error::IoError(_)
|
||||||
| Error::Persist(_)
|
| Error::Persist(_)
|
||||||
| Error::FeatureNotEnabled(_)
|
| Error::FeatureNotEnabled(_)
|
||||||
|
| Error::Export(_)
|
||||||
| Error::Anyhow(_) => true,
|
| Error::Anyhow(_) => true,
|
||||||
Error::CreateBatch(_)
|
Error::CreateBatch(_)
|
||||||
| Error::CorruptedTaskQueue
|
| Error::CorruptedTaskQueue
|
||||||
@ -282,6 +288,7 @@ impl ErrorCode for Error {
|
|||||||
Error::Dump(e) => e.error_code(),
|
Error::Dump(e) => e.error_code(),
|
||||||
Error::Milli { error, .. } => error.error_code(),
|
Error::Milli { error, .. } => error.error_code(),
|
||||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||||
|
Error::FromRemoteWhenExporting { .. } => Code::Internal,
|
||||||
Error::Heed(e) => e.error_code(),
|
Error::Heed(e) => e.error_code(),
|
||||||
Error::HeedTransaction(e) => e.error_code(),
|
Error::HeedTransaction(e) => e.error_code(),
|
||||||
Error::FileStore(e) => e.error_code(),
|
Error::FileStore(e) => e.error_code(),
|
||||||
@ -294,6 +301,7 @@ impl ErrorCode for Error {
|
|||||||
Error::CorruptedTaskQueue => Code::Internal,
|
Error::CorruptedTaskQueue => Code::Internal,
|
||||||
Error::CorruptedDump => Code::Internal,
|
Error::CorruptedDump => Code::Internal,
|
||||||
Error::DatabaseUpgrade(_) => Code::Internal,
|
Error::DatabaseUpgrade(_) => Code::Internal,
|
||||||
|
Error::Export(_) => Code::Internal,
|
||||||
Error::RollbackFailed { .. } => Code::Internal,
|
Error::RollbackFailed { .. } => Code::Internal,
|
||||||
Error::UnrecoverableError(_) => Code::Internal,
|
Error::UnrecoverableError(_) => Code::Internal,
|
||||||
Error::IndexSchedulerVersionMismatch { .. } => Code::Internal,
|
Error::IndexSchedulerVersionMismatch { .. } => Code::Internal,
|
||||||
|
@ -144,6 +144,19 @@ impl RoFeatures {
|
|||||||
.into())
|
.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn check_multimodal(&self, disabled_action: &'static str) -> Result<()> {
|
||||||
|
if self.runtime.multimodal {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(FeatureNotEnabledError {
|
||||||
|
disabled_action,
|
||||||
|
feature: "multimodal",
|
||||||
|
issue_link: "https://github.com/orgs/meilisearch/discussions/846",
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FeatureData {
|
impl FeatureData {
|
||||||
|
@ -289,6 +289,9 @@ fn snapshot_details(d: &Details) -> String {
|
|||||||
Details::IndexSwap { swaps } => {
|
Details::IndexSwap { swaps } => {
|
||||||
format!("{{ swaps: {swaps:?} }}")
|
format!("{{ swaps: {swaps:?} }}")
|
||||||
}
|
}
|
||||||
|
Details::Export { url, api_key, payload_size, indexes } => {
|
||||||
|
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
|
||||||
|
}
|
||||||
Details::UpgradeDatabase { from, to } => {
|
Details::UpgradeDatabase { from, to } => {
|
||||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||||
}
|
}
|
||||||
@ -343,6 +346,7 @@ pub fn snapshot_batch(batch: &Batch) -> String {
|
|||||||
uid,
|
uid,
|
||||||
details,
|
details,
|
||||||
stats,
|
stats,
|
||||||
|
embedder_stats,
|
||||||
started_at,
|
started_at,
|
||||||
finished_at,
|
finished_at,
|
||||||
progress: _,
|
progress: _,
|
||||||
@ -366,6 +370,12 @@ pub fn snapshot_batch(batch: &Batch) -> String {
|
|||||||
snap.push_str(&format!("uid: {uid}, "));
|
snap.push_str(&format!("uid: {uid}, "));
|
||||||
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
||||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
|
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
|
||||||
|
if !embedder_stats.skip_serializing() {
|
||||||
|
snap.push_str(&format!(
|
||||||
|
"embedder stats: {}, ",
|
||||||
|
serde_json::to_string(&embedder_stats).unwrap()
|
||||||
|
));
|
||||||
|
}
|
||||||
snap.push_str(&format!("stop reason: {}, ", serde_json::to_string(&stop_reason).unwrap()));
|
snap.push_str(&format!("stop reason: {}, ", serde_json::to_string(&stop_reason).unwrap()));
|
||||||
snap.push('}');
|
snap.push('}');
|
||||||
snap
|
snap
|
||||||
|
@ -57,12 +57,15 @@ use meilisearch_types::features::{
|
|||||||
use meilisearch_types::heed::byteorder::BE;
|
use meilisearch_types::heed::byteorder::BE;
|
||||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeJson, Str, I128};
|
use meilisearch_types::heed::types::{DecodeIgnore, SerdeJson, Str, I128};
|
||||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, WithoutTls};
|
use meilisearch_types::heed::{self, Database, Env, RoTxn, WithoutTls};
|
||||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
use meilisearch_types::milli::vector::json_template::JsonTemplate;
|
||||||
|
use meilisearch_types::milli::vector::{
|
||||||
|
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
|
||||||
|
};
|
||||||
use meilisearch_types::milli::{self, Index};
|
use meilisearch_types::milli::{self, Index};
|
||||||
use meilisearch_types::task_view::TaskView;
|
use meilisearch_types::task_view::TaskView;
|
||||||
use meilisearch_types::tasks::{KindWithContent, Task};
|
use meilisearch_types::tasks::{KindWithContent, Task};
|
||||||
|
use milli::vector::db::IndexEmbeddingConfig;
|
||||||
use processing::ProcessingTasks;
|
use processing::ProcessingTasks;
|
||||||
pub use queue::Query;
|
pub use queue::Query;
|
||||||
use queue::Queue;
|
use queue::Queue;
|
||||||
@ -851,29 +854,42 @@ impl IndexScheduler {
|
|||||||
&self,
|
&self,
|
||||||
index_uid: String,
|
index_uid: String,
|
||||||
embedding_configs: Vec<IndexEmbeddingConfig>,
|
embedding_configs: Vec<IndexEmbeddingConfig>,
|
||||||
) -> Result<EmbeddingConfigs> {
|
) -> Result<RuntimeEmbedders> {
|
||||||
let res: Result<_> = embedding_configs
|
let res: Result<_> = embedding_configs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(
|
.map(
|
||||||
|IndexEmbeddingConfig {
|
|IndexEmbeddingConfig {
|
||||||
name,
|
name,
|
||||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
|
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
|
||||||
..
|
fragments,
|
||||||
}| {
|
}|
|
||||||
let prompt = Arc::new(
|
-> Result<(String, Arc<RuntimeEmbedder>)> {
|
||||||
prompt
|
let document_template = prompt
|
||||||
.try_into()
|
.try_into()
|
||||||
.map_err(meilisearch_types::milli::Error::from)
|
.map_err(meilisearch_types::milli::Error::from)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
);
|
|
||||||
|
let fragments = fragments
|
||||||
|
.into_inner()
|
||||||
|
.into_iter()
|
||||||
|
.map(|fragment| {
|
||||||
|
let value = embedder_options.fragment(&fragment.name).unwrap();
|
||||||
|
let template = JsonTemplate::new(value.clone()).unwrap();
|
||||||
|
RuntimeFragment { name: fragment.name, id: fragment.id, template }
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
// optimistically return existing embedder
|
// optimistically return existing embedder
|
||||||
{
|
{
|
||||||
let embedders = self.embedders.read().unwrap();
|
let embedders = self.embedders.read().unwrap();
|
||||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||||
return Ok((
|
let runtime = Arc::new(RuntimeEmbedder::new(
|
||||||
name,
|
embedder.clone(),
|
||||||
(embedder.clone(), prompt, quantized.unwrap_or_default()),
|
document_template,
|
||||||
|
fragments,
|
||||||
|
quantized.unwrap_or_default(),
|
||||||
));
|
));
|
||||||
|
|
||||||
|
return Ok((name, runtime));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -889,11 +905,19 @@ impl IndexScheduler {
|
|||||||
let mut embedders = self.embedders.write().unwrap();
|
let mut embedders = self.embedders.write().unwrap();
|
||||||
embedders.insert(embedder_options, embedder.clone());
|
embedders.insert(embedder_options, embedder.clone());
|
||||||
}
|
}
|
||||||
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
|
||||||
|
let runtime = Arc::new(RuntimeEmbedder::new(
|
||||||
|
embedder.clone(),
|
||||||
|
document_template,
|
||||||
|
fragments,
|
||||||
|
quantized.unwrap_or_default(),
|
||||||
|
));
|
||||||
|
|
||||||
|
Ok((name, runtime))
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.collect();
|
.collect();
|
||||||
res.map(EmbeddingConfigs::new)
|
res.map(RuntimeEmbedders::new)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn chat_settings(&self, uid: &str) -> Result<Option<ChatCompletionSettings>> {
|
pub fn chat_settings(&self, uid: &str) -> Result<Option<ChatCompletionSettings>> {
|
||||||
|
@ -103,6 +103,7 @@ make_enum_progress! {
|
|||||||
pub enum DumpCreationProgress {
|
pub enum DumpCreationProgress {
|
||||||
StartTheDumpCreation,
|
StartTheDumpCreation,
|
||||||
DumpTheApiKeys,
|
DumpTheApiKeys,
|
||||||
|
DumpTheChatCompletionSettings,
|
||||||
DumpTheTasks,
|
DumpTheTasks,
|
||||||
DumpTheBatches,
|
DumpTheBatches,
|
||||||
DumpTheIndexes,
|
DumpTheIndexes,
|
||||||
@ -175,8 +176,17 @@ make_enum_progress! {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum Export {
|
||||||
|
EnsuringCorrectnessOfTheTarget,
|
||||||
|
ExportingTheSettings,
|
||||||
|
ExportingTheDocuments,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
|
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
|
||||||
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||||
|
make_atomic_progress!(Index alias AtomicIndexStep => "index" );
|
||||||
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
||||||
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
||||||
|
|
||||||
|
@ -179,6 +179,7 @@ impl BatchQueue {
|
|||||||
progress: None,
|
progress: None,
|
||||||
details: batch.details,
|
details: batch.details,
|
||||||
stats: batch.stats,
|
stats: batch.stats,
|
||||||
|
embedder_stats: batch.embedder_stats.as_ref().into(),
|
||||||
started_at: batch.started_at,
|
started_at: batch.started_at,
|
||||||
finished_at: batch.finished_at,
|
finished_at: batch.finished_at,
|
||||||
enqueued_at: batch.enqueued_at,
|
enqueued_at: batch.enqueued_at,
|
||||||
|
@ -71,6 +71,7 @@ impl From<KindWithContent> for AutobatchKind {
|
|||||||
KindWithContent::TaskCancelation { .. }
|
KindWithContent::TaskCancelation { .. }
|
||||||
| KindWithContent::TaskDeletion { .. }
|
| KindWithContent::TaskDeletion { .. }
|
||||||
| KindWithContent::DumpCreation { .. }
|
| KindWithContent::DumpCreation { .. }
|
||||||
|
| KindWithContent::Export { .. }
|
||||||
| KindWithContent::UpgradeDatabase { .. }
|
| KindWithContent::UpgradeDatabase { .. }
|
||||||
| KindWithContent::SnapshotCreation => {
|
| KindWithContent::SnapshotCreation => {
|
||||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
use std::io::ErrorKind;
|
||||||
|
|
||||||
use meilisearch_types::heed::RoTxn;
|
use meilisearch_types::heed::RoTxn;
|
||||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||||
@ -47,6 +48,9 @@ pub(crate) enum Batch {
|
|||||||
IndexSwap {
|
IndexSwap {
|
||||||
task: Task,
|
task: Task,
|
||||||
},
|
},
|
||||||
|
Export {
|
||||||
|
task: Task,
|
||||||
|
},
|
||||||
UpgradeDatabase {
|
UpgradeDatabase {
|
||||||
tasks: Vec<Task>,
|
tasks: Vec<Task>,
|
||||||
},
|
},
|
||||||
@ -103,6 +107,7 @@ impl Batch {
|
|||||||
Batch::TaskCancelation { task, .. }
|
Batch::TaskCancelation { task, .. }
|
||||||
| Batch::Dump(task)
|
| Batch::Dump(task)
|
||||||
| Batch::IndexCreation { task, .. }
|
| Batch::IndexCreation { task, .. }
|
||||||
|
| Batch::Export { task }
|
||||||
| Batch::IndexUpdate { task, .. } => {
|
| Batch::IndexUpdate { task, .. } => {
|
||||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||||
}
|
}
|
||||||
@ -142,6 +147,7 @@ impl Batch {
|
|||||||
| TaskDeletions(_)
|
| TaskDeletions(_)
|
||||||
| SnapshotCreation(_)
|
| SnapshotCreation(_)
|
||||||
| Dump(_)
|
| Dump(_)
|
||||||
|
| Export { .. }
|
||||||
| UpgradeDatabase { .. }
|
| UpgradeDatabase { .. }
|
||||||
| IndexSwap { .. } => None,
|
| IndexSwap { .. } => None,
|
||||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||||
@ -167,6 +173,7 @@ impl fmt::Display for Batch {
|
|||||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||||
|
Batch::Export { .. } => f.write_str("Export")?,
|
||||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||||
};
|
};
|
||||||
match index_uid {
|
match index_uid {
|
||||||
@ -426,9 +433,10 @@ impl IndexScheduler {
|
|||||||
/// 0. We get the *last* task to cancel.
|
/// 0. We get the *last* task to cancel.
|
||||||
/// 1. We get the tasks to upgrade.
|
/// 1. We get the tasks to upgrade.
|
||||||
/// 2. We get the *next* task to delete.
|
/// 2. We get the *next* task to delete.
|
||||||
/// 3. We get the *next* snapshot to process.
|
/// 3. We get the *next* export to process.
|
||||||
/// 4. We get the *next* dump to process.
|
/// 4. We get the *next* snapshot to process.
|
||||||
/// 5. We get the *next* tasks to process for a specific index.
|
/// 5. We get the *next* dump to process.
|
||||||
|
/// 6. We get the *next* tasks to process for a specific index.
|
||||||
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
|
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
|
||||||
pub(crate) fn create_next_batch(
|
pub(crate) fn create_next_batch(
|
||||||
&self,
|
&self,
|
||||||
@ -500,7 +508,17 @@ impl IndexScheduler {
|
|||||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. we batch the snapshot.
|
// 3. we batch the export.
|
||||||
|
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
|
||||||
|
if !to_export.is_empty() {
|
||||||
|
let task_id = to_export.iter().next().expect("There must be at least one export task");
|
||||||
|
let mut task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
|
||||||
|
current_batch.processing([&mut task]);
|
||||||
|
current_batch.reason(BatchStopReason::TaskKindCannotBeBatched { kind: Kind::Export });
|
||||||
|
return Ok(Some((Batch::Export { task }, current_batch)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. we batch the snapshot.
|
||||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||||
if !to_snapshot.is_empty() {
|
if !to_snapshot.is_empty() {
|
||||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||||
@ -510,7 +528,7 @@ impl IndexScheduler {
|
|||||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. we batch the dumps.
|
// 5. we batch the dumps.
|
||||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||||
if let Some(to_dump) = to_dump.min() {
|
if let Some(to_dump) = to_dump.min() {
|
||||||
let mut task =
|
let mut task =
|
||||||
@ -523,7 +541,7 @@ impl IndexScheduler {
|
|||||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
// 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||||
let mut task =
|
let mut task =
|
||||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
@ -577,7 +595,11 @@ impl IndexScheduler {
|
|||||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
|
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
|
||||||
|
|
||||||
if let Some(uuid) = task.content_uuid() {
|
if let Some(uuid) = task.content_uuid() {
|
||||||
let content_size = self.queue.file_store.compute_size(uuid)?;
|
let content_size = match self.queue.file_store.compute_size(uuid) {
|
||||||
|
Ok(content_size) => content_size,
|
||||||
|
Err(file_store::Error::IoError(err)) if err.kind() == ErrorKind::NotFound => 0,
|
||||||
|
Err(otherwise) => return Err(otherwise.into()),
|
||||||
|
};
|
||||||
total_size = total_size.saturating_add(content_size);
|
total_size = total_size.saturating_add(content_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ mod autobatcher_test;
|
|||||||
mod create_batch;
|
mod create_batch;
|
||||||
mod process_batch;
|
mod process_batch;
|
||||||
mod process_dump_creation;
|
mod process_dump_creation;
|
||||||
|
mod process_export;
|
||||||
mod process_index_operation;
|
mod process_index_operation;
|
||||||
mod process_snapshot_creation;
|
mod process_snapshot_creation;
|
||||||
mod process_upgrade;
|
mod process_upgrade;
|
||||||
|
@ -162,8 +162,13 @@ impl IndexScheduler {
|
|||||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||||
|
|
||||||
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
|
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
|
||||||
let (tasks, congestion) =
|
let (tasks, congestion) = self.apply_index_operation(
|
||||||
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
|
&mut index_wtxn,
|
||||||
|
&index,
|
||||||
|
op,
|
||||||
|
&progress,
|
||||||
|
current_batch.embedder_stats.clone(),
|
||||||
|
)?;
|
||||||
|
|
||||||
{
|
{
|
||||||
progress.update_progress(FinalizingIndexStep::Committing);
|
progress.update_progress(FinalizingIndexStep::Committing);
|
||||||
@ -238,10 +243,12 @@ impl IndexScheduler {
|
|||||||
);
|
);
|
||||||
builder.set_primary_key(primary_key);
|
builder.set_primary_key(primary_key);
|
||||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||||
|
|
||||||
builder
|
builder
|
||||||
.execute(
|
.execute(
|
||||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
&|| must_stop_processing.get(),
|
||||||
|| must_stop_processing.get(),
|
&progress,
|
||||||
|
current_batch.embedder_stats.clone(),
|
||||||
)
|
)
|
||||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||||
index_wtxn.commit()?;
|
index_wtxn.commit()?;
|
||||||
@ -361,6 +368,46 @@ impl IndexScheduler {
|
|||||||
task.status = Status::Succeeded;
|
task.status = Status::Succeeded;
|
||||||
Ok((vec![task], ProcessBatchInfo::default()))
|
Ok((vec![task], ProcessBatchInfo::default()))
|
||||||
}
|
}
|
||||||
|
Batch::Export { mut task } => {
|
||||||
|
let KindWithContent::Export { url, api_key, payload_size, indexes } = &task.kind
|
||||||
|
else {
|
||||||
|
unreachable!()
|
||||||
|
};
|
||||||
|
|
||||||
|
let ret = catch_unwind(AssertUnwindSafe(|| {
|
||||||
|
self.process_export(
|
||||||
|
url,
|
||||||
|
api_key.as_deref(),
|
||||||
|
payload_size.as_ref(),
|
||||||
|
indexes,
|
||||||
|
progress,
|
||||||
|
)
|
||||||
|
}));
|
||||||
|
|
||||||
|
let stats = match ret {
|
||||||
|
Ok(Ok(stats)) => stats,
|
||||||
|
Ok(Err(Error::AbortedTask)) => return Err(Error::AbortedTask),
|
||||||
|
Ok(Err(e)) => return Err(Error::Export(Box::new(e))),
|
||||||
|
Err(e) => {
|
||||||
|
let msg = match e.downcast_ref::<&'static str>() {
|
||||||
|
Some(s) => *s,
|
||||||
|
None => match e.downcast_ref::<String>() {
|
||||||
|
Some(s) => &s[..],
|
||||||
|
None => "Box<dyn Any>",
|
||||||
|
},
|
||||||
|
};
|
||||||
|
return Err(Error::Export(Box::new(Error::ProcessBatchPanicked(
|
||||||
|
msg.to_string(),
|
||||||
|
))));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
task.status = Status::Succeeded;
|
||||||
|
if let Some(Details::Export { indexes, .. }) = task.details.as_mut() {
|
||||||
|
*indexes = stats;
|
||||||
|
}
|
||||||
|
Ok((vec![task], ProcessBatchInfo::default()))
|
||||||
|
}
|
||||||
Batch::UpgradeDatabase { mut tasks } => {
|
Batch::UpgradeDatabase { mut tasks } => {
|
||||||
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
||||||
unreachable!();
|
unreachable!();
|
||||||
@ -708,9 +755,11 @@ impl IndexScheduler {
|
|||||||
from.1,
|
from.1,
|
||||||
from.2
|
from.2
|
||||||
);
|
);
|
||||||
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
let ret = catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||||
self.process_rollback(from, progress)
|
self.process_rollback(from, progress)
|
||||||
})) {
|
}));
|
||||||
|
|
||||||
|
match ret {
|
||||||
Ok(Ok(())) => {}
|
Ok(Ok(())) => {}
|
||||||
Ok(Err(err)) => return Err(Error::DatabaseUpgrade(Box::new(err))),
|
Ok(Err(err)) => return Err(Error::DatabaseUpgrade(Box::new(err))),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
@ -5,6 +5,7 @@ use std::sync::atomic::Ordering;
|
|||||||
|
|
||||||
use dump::IndexMetadata;
|
use dump::IndexMetadata;
|
||||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||||
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||||
use meilisearch_types::milli::{self};
|
use meilisearch_types::milli::{self};
|
||||||
@ -43,7 +44,16 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
|
|
||||||
// 2. dump the tasks
|
// 2. dump the chat completion settings
|
||||||
|
// TODO should I skip the export if the chat completion has been disabled?
|
||||||
|
progress.update_progress(DumpCreationProgress::DumpTheChatCompletionSettings);
|
||||||
|
let mut dump_chat_completion_settings = dump.create_chat_completions_settings()?;
|
||||||
|
for result in self.chat_settings.iter(&rtxn)? {
|
||||||
|
let (name, chat_settings) = result?;
|
||||||
|
dump_chat_completion_settings.push_settings(name, &chat_settings)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. dump the tasks
|
||||||
progress.update_progress(DumpCreationProgress::DumpTheTasks);
|
progress.update_progress(DumpCreationProgress::DumpTheTasks);
|
||||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||||
|
|
||||||
@ -81,7 +91,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||||
|
|
||||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||||
if let Some(content_file) = content_file {
|
if let Some(content_file) = content_file {
|
||||||
if self.scheduler.must_stop_processing.get() {
|
if self.scheduler.must_stop_processing.get() {
|
||||||
return Err(Error::AbortedTask);
|
return Err(Error::AbortedTask);
|
||||||
@ -105,7 +115,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
dump_tasks.flush()?;
|
dump_tasks.flush()?;
|
||||||
|
|
||||||
// 3. dump the batches
|
// 4. dump the batches
|
||||||
progress.update_progress(DumpCreationProgress::DumpTheBatches);
|
progress.update_progress(DumpCreationProgress::DumpTheBatches);
|
||||||
let mut dump_batches = dump.create_batches_queue()?;
|
let mut dump_batches = dump.create_batches_queue()?;
|
||||||
|
|
||||||
@ -138,7 +148,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
dump_batches.flush()?;
|
dump_batches.flush()?;
|
||||||
|
|
||||||
// 4. Dump the indexes
|
// 5. Dump the indexes
|
||||||
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
||||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
@ -165,9 +175,6 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
let embedding_configs = index
|
|
||||||
.embedding_configs(&rtxn)
|
|
||||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
|
||||||
|
|
||||||
let nb_documents = index
|
let nb_documents = index
|
||||||
.number_of_documents(&rtxn)
|
.number_of_documents(&rtxn)
|
||||||
@ -178,7 +185,7 @@ impl IndexScheduler {
|
|||||||
let documents = index
|
let documents = index
|
||||||
.all_documents(&rtxn)
|
.all_documents(&rtxn)
|
||||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||||
// 4.1. Dump the documents
|
// 5.1. Dump the documents
|
||||||
for ret in documents {
|
for ret in documents {
|
||||||
if self.scheduler.must_stop_processing.get() {
|
if self.scheduler.must_stop_processing.get() {
|
||||||
return Err(Error::AbortedTask);
|
return Err(Error::AbortedTask);
|
||||||
@ -221,16 +228,21 @@ impl IndexScheduler {
|
|||||||
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
||||||
};
|
};
|
||||||
|
|
||||||
for (embedder_name, embeddings) in embeddings {
|
for (
|
||||||
let user_provided = embedding_configs
|
embedder_name,
|
||||||
.iter()
|
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||||
.find(|conf| conf.name == embedder_name)
|
) in embeddings
|
||||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
{
|
||||||
let embeddings = ExplicitVectors {
|
let embeddings = ExplicitVectors {
|
||||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||||
embeddings,
|
embeddings,
|
||||||
)),
|
)),
|
||||||
regenerate: !user_provided,
|
regenerate: regenerate &&
|
||||||
|
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||||
|
// are marked as user-provided,
|
||||||
|
// all embeddings would be regenerated on any settings change or document update.
|
||||||
|
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||||
|
!has_fragments,
|
||||||
};
|
};
|
||||||
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||||
}
|
}
|
||||||
@ -240,7 +252,7 @@ impl IndexScheduler {
|
|||||||
atomic.fetch_add(1, Ordering::Relaxed);
|
atomic.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4.2. Dump the settings
|
// 5.2. Dump the settings
|
||||||
let settings = meilisearch_types::settings::settings(
|
let settings = meilisearch_types::settings::settings(
|
||||||
index,
|
index,
|
||||||
&rtxn,
|
&rtxn,
|
||||||
@ -251,7 +263,7 @@ impl IndexScheduler {
|
|||||||
Ok(())
|
Ok(())
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
// 5. Dump experimental feature settings
|
// 6. Dump experimental feature settings
|
||||||
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
||||||
let features = self.features().runtime_features();
|
let features = self.features().runtime_features();
|
||||||
dump.create_experimental_features(features)?;
|
dump.create_experimental_features(features)?;
|
||||||
|
377
crates/index-scheduler/src/scheduler/process_export.rs
Normal file
377
crates/index-scheduler/src/scheduler/process_export.rs
Normal file
@ -0,0 +1,377 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::io::{self, Write as _};
|
||||||
|
use std::sync::atomic;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use backoff::ExponentialBackoff;
|
||||||
|
use byte_unit::Byte;
|
||||||
|
use flate2::write::GzEncoder;
|
||||||
|
use flate2::Compression;
|
||||||
|
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||||
|
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||||
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
|
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||||
|
use meilisearch_types::milli::update::{request_threads, Setting};
|
||||||
|
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||||
|
use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError};
|
||||||
|
use meilisearch_types::settings::{self, SecretPolicy};
|
||||||
|
use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings};
|
||||||
|
use serde::Deserialize;
|
||||||
|
use ureq::{json, Response};
|
||||||
|
|
||||||
|
use super::MustStopProcessing;
|
||||||
|
use crate::processing::AtomicDocumentStep;
|
||||||
|
use crate::{Error, IndexScheduler, Result};
|
||||||
|
|
||||||
|
impl IndexScheduler {
|
||||||
|
pub(super) fn process_export(
|
||||||
|
&self,
|
||||||
|
base_url: &str,
|
||||||
|
api_key: Option<&str>,
|
||||||
|
payload_size: Option<&Byte>,
|
||||||
|
indexes: &BTreeMap<IndexUidPattern, ExportIndexSettings>,
|
||||||
|
progress: Progress,
|
||||||
|
) -> Result<BTreeMap<IndexUidPattern, DetailsExportIndexSettings>> {
|
||||||
|
#[cfg(test)]
|
||||||
|
self.maybe_fail(crate::test_utils::FailureLocation::ProcessExport)?;
|
||||||
|
|
||||||
|
let indexes: Vec<_> = self
|
||||||
|
.index_names()?
|
||||||
|
.into_iter()
|
||||||
|
.flat_map(|uid| {
|
||||||
|
indexes
|
||||||
|
.iter()
|
||||||
|
.find(|(pattern, _)| pattern.matches_str(&uid))
|
||||||
|
.map(|(pattern, settings)| (pattern, uid, settings))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut output = BTreeMap::new();
|
||||||
|
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
|
||||||
|
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||||
|
for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() {
|
||||||
|
if must_stop_processing.get() {
|
||||||
|
return Err(Error::AbortedTask);
|
||||||
|
}
|
||||||
|
|
||||||
|
progress.update_progress(VariableNameStep::<ExportIndex>::new(
|
||||||
|
format!("Exporting index `{uid}`"),
|
||||||
|
i as u32,
|
||||||
|
indexes.len() as u32,
|
||||||
|
));
|
||||||
|
|
||||||
|
let ExportIndexSettings { filter, override_settings } = export_settings;
|
||||||
|
let index = self.index(uid)?;
|
||||||
|
let index_rtxn = index.read_txn()?;
|
||||||
|
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
|
||||||
|
|
||||||
|
// First, check if the index already exists
|
||||||
|
let url = format!("{base_url}/indexes/{uid}");
|
||||||
|
let response = retry(&must_stop_processing, || {
|
||||||
|
let mut request = agent.get(&url);
|
||||||
|
if let Some(bearer) = &bearer {
|
||||||
|
request = request.set("Authorization", bearer);
|
||||||
|
}
|
||||||
|
|
||||||
|
request.send_bytes(Default::default()).map_err(into_backoff_error)
|
||||||
|
});
|
||||||
|
let index_exists = match response {
|
||||||
|
Ok(response) => response.status() == 200,
|
||||||
|
Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
Err(e) => return Err(e),
|
||||||
|
};
|
||||||
|
|
||||||
|
let primary_key = index
|
||||||
|
.primary_key(&index_rtxn)
|
||||||
|
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||||
|
|
||||||
|
// Create the index
|
||||||
|
if !index_exists {
|
||||||
|
let url = format!("{base_url}/indexes");
|
||||||
|
retry(&must_stop_processing, || {
|
||||||
|
let mut request = agent.post(&url);
|
||||||
|
if let Some(bearer) = &bearer {
|
||||||
|
request = request.set("Authorization", bearer);
|
||||||
|
}
|
||||||
|
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
|
||||||
|
request.send_json(&index_param).map_err(into_backoff_error)
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Patch the index primary key
|
||||||
|
if index_exists && *override_settings {
|
||||||
|
let url = format!("{base_url}/indexes/{uid}");
|
||||||
|
retry(&must_stop_processing, || {
|
||||||
|
let mut request = agent.patch(&url);
|
||||||
|
if let Some(bearer) = &bearer {
|
||||||
|
request = request.set("Authorization", bearer);
|
||||||
|
}
|
||||||
|
let index_param = json!({ "primaryKey": primary_key });
|
||||||
|
request.send_json(&index_param).map_err(into_backoff_error)
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send the index settings
|
||||||
|
if !index_exists || *override_settings {
|
||||||
|
let mut settings =
|
||||||
|
settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets)
|
||||||
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||||
|
// Remove the experimental chat setting if not enabled
|
||||||
|
if self.features().check_chat_completions("exporting chat settings").is_err() {
|
||||||
|
settings.chat = Setting::NotSet;
|
||||||
|
}
|
||||||
|
// Retry logic for sending settings
|
||||||
|
let url = format!("{base_url}/indexes/{uid}/settings");
|
||||||
|
retry(&must_stop_processing, || {
|
||||||
|
let mut request = agent.patch(&url);
|
||||||
|
if let Some(bearer) = bearer.as_ref() {
|
||||||
|
request = request.set("Authorization", bearer);
|
||||||
|
}
|
||||||
|
request.send_json(settings.clone()).map_err(into_backoff_error)
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let filter = filter
|
||||||
|
.as_ref()
|
||||||
|
.map(Filter::from_json)
|
||||||
|
.transpose()
|
||||||
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
||||||
|
.flatten();
|
||||||
|
|
||||||
|
let filter_universe = filter
|
||||||
|
.map(|f| f.evaluate(&index_rtxn, &index))
|
||||||
|
.transpose()
|
||||||
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||||
|
let whole_universe = index
|
||||||
|
.documents_ids(&index_rtxn)
|
||||||
|
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||||
|
let universe = filter_universe.unwrap_or(whole_universe);
|
||||||
|
|
||||||
|
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
|
||||||
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
|
|
||||||
|
// We don't need to keep this one alive as we will
|
||||||
|
// spawn many threads to process the documents
|
||||||
|
drop(index_rtxn);
|
||||||
|
|
||||||
|
let total_documents = universe.len() as u32;
|
||||||
|
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
|
||||||
|
progress.update_progress(progress_step);
|
||||||
|
|
||||||
|
output.insert(
|
||||||
|
IndexUidPattern::new_unchecked(uid.clone()),
|
||||||
|
DetailsExportIndexSettings {
|
||||||
|
settings: (*export_settings).clone(),
|
||||||
|
matched_documents: Some(total_documents as u64),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
|
||||||
|
let documents_url = format!("{base_url}/indexes/{uid}/documents");
|
||||||
|
|
||||||
|
let results = request_threads()
|
||||||
|
.broadcast(|ctx| {
|
||||||
|
let index_rtxn = index
|
||||||
|
.read_txn()
|
||||||
|
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||||
|
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let mut tmp_buffer = Vec::new();
|
||||||
|
let mut compressed_buffer = Vec::new();
|
||||||
|
for (i, docid) in universe.iter().enumerate() {
|
||||||
|
if i % ctx.num_threads() != ctx.index() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let document = index
|
||||||
|
.document(&index_rtxn, docid)
|
||||||
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||||
|
|
||||||
|
let mut document = obkv_to_json(&all_fields, &fields_ids_map, document)
|
||||||
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||||
|
|
||||||
|
// TODO definitely factorize this code
|
||||||
|
'inject_vectors: {
|
||||||
|
let embeddings = index
|
||||||
|
.embeddings(&index_rtxn, docid)
|
||||||
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||||
|
|
||||||
|
if embeddings.is_empty() {
|
||||||
|
break 'inject_vectors;
|
||||||
|
}
|
||||||
|
|
||||||
|
let vectors = document
|
||||||
|
.entry(RESERVED_VECTORS_FIELD_NAME)
|
||||||
|
.or_insert(serde_json::Value::Object(Default::default()));
|
||||||
|
|
||||||
|
let serde_json::Value::Object(vectors) = vectors else {
|
||||||
|
return Err(Error::from_milli(
|
||||||
|
milli::Error::UserError(
|
||||||
|
milli::UserError::InvalidVectorsMapType {
|
||||||
|
document_id: {
|
||||||
|
if let Ok(Some(Ok(index))) = index
|
||||||
|
.external_id_of(
|
||||||
|
&index_rtxn,
|
||||||
|
std::iter::once(docid),
|
||||||
|
)
|
||||||
|
.map(|it| it.into_iter().next())
|
||||||
|
{
|
||||||
|
index
|
||||||
|
} else {
|
||||||
|
format!("internal docid={docid}")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
value: vectors.clone(),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Some(uid.to_string()),
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
for (
|
||||||
|
embedder_name,
|
||||||
|
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||||
|
) in embeddings
|
||||||
|
{
|
||||||
|
let embeddings = ExplicitVectors {
|
||||||
|
embeddings: Some(
|
||||||
|
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||||
|
),
|
||||||
|
regenerate: regenerate &&
|
||||||
|
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||||
|
// are marked as user-provided,
|
||||||
|
// all embeddings would be regenerated on any settings change or document update.
|
||||||
|
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||||
|
!has_fragments,
|
||||||
|
};
|
||||||
|
vectors.insert(
|
||||||
|
embedder_name,
|
||||||
|
serde_json::to_value(embeddings).unwrap(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp_buffer.clear();
|
||||||
|
serde_json::to_writer(&mut tmp_buffer, &document)
|
||||||
|
.map_err(milli::InternalError::from)
|
||||||
|
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||||
|
|
||||||
|
// Make sure we put at least one document in the buffer even
|
||||||
|
// though we might go above the buffer limit before sending
|
||||||
|
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
|
||||||
|
// We compress the documents before sending them
|
||||||
|
let mut encoder =
|
||||||
|
GzEncoder::new(&mut compressed_buffer, Compression::default());
|
||||||
|
encoder
|
||||||
|
.write_all(&buffer)
|
||||||
|
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||||
|
encoder
|
||||||
|
.finish()
|
||||||
|
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||||
|
|
||||||
|
retry(&must_stop_processing, || {
|
||||||
|
let mut request = agent.post(&documents_url);
|
||||||
|
request = request.set("Content-Type", "application/x-ndjson");
|
||||||
|
request = request.set("Content-Encoding", "gzip");
|
||||||
|
if let Some(bearer) = &bearer {
|
||||||
|
request = request.set("Authorization", bearer);
|
||||||
|
}
|
||||||
|
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
|
||||||
|
})?;
|
||||||
|
buffer.clear();
|
||||||
|
compressed_buffer.clear();
|
||||||
|
}
|
||||||
|
buffer.extend_from_slice(&tmp_buffer);
|
||||||
|
|
||||||
|
if i > 0 && i % 100 == 0 {
|
||||||
|
step.fetch_add(100, atomic::Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
retry(&must_stop_processing, || {
|
||||||
|
let mut request = agent.post(&documents_url);
|
||||||
|
request = request.set("Content-Type", "application/x-ndjson");
|
||||||
|
if let Some(bearer) = &bearer {
|
||||||
|
request = request.set("Authorization", bearer);
|
||||||
|
}
|
||||||
|
request.send_bytes(&buffer).map_err(into_backoff_error)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
.map_err(|e| {
|
||||||
|
Error::from_milli(
|
||||||
|
milli::Error::InternalError(InternalError::PanicInThreadPool(e)),
|
||||||
|
Some(uid.to_string()),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
for result in results {
|
||||||
|
result?;
|
||||||
|
}
|
||||||
|
|
||||||
|
step.store(total_documents, atomic::Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn retry<F>(must_stop_processing: &MustStopProcessing, send_request: F) -> Result<ureq::Response>
|
||||||
|
where
|
||||||
|
F: Fn() -> Result<ureq::Response, backoff::Error<ureq::Error>>,
|
||||||
|
{
|
||||||
|
match backoff::retry(ExponentialBackoff::default(), || {
|
||||||
|
if must_stop_processing.get() {
|
||||||
|
return Err(backoff::Error::Permanent(ureq::Error::Status(
|
||||||
|
u16::MAX,
|
||||||
|
// 444: Connection Closed Without Response
|
||||||
|
Response::new(444, "Abort", "Aborted task").unwrap(),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
send_request()
|
||||||
|
}) {
|
||||||
|
Ok(response) => Ok(response),
|
||||||
|
Err(backoff::Error::Permanent(e)) => Err(ureq_error_into_error(e)),
|
||||||
|
Err(backoff::Error::Transient { err, retry_after: _ }) => Err(ureq_error_into_error(err)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_backoff_error(err: ureq::Error) -> backoff::Error<ureq::Error> {
|
||||||
|
match err {
|
||||||
|
// Those code status must trigger an automatic retry
|
||||||
|
// <https://www.restapitutorial.com/advanced/responses/retries>
|
||||||
|
ureq::Error::Status(408 | 429 | 500 | 502 | 503 | 504, _) => {
|
||||||
|
backoff::Error::Transient { err, retry_after: None }
|
||||||
|
}
|
||||||
|
ureq::Error::Status(_, _) => backoff::Error::Permanent(err),
|
||||||
|
ureq::Error::Transport(_) => backoff::Error::Transient { err, retry_after: None },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts a `ureq::Error` into an `Error`.
|
||||||
|
fn ureq_error_into_error(error: ureq::Error) -> Error {
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct MeiliError {
|
||||||
|
message: String,
|
||||||
|
code: String,
|
||||||
|
r#type: String,
|
||||||
|
link: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
match error {
|
||||||
|
// This is a workaround to handle task abortion - the error propagation path
|
||||||
|
// makes it difficult to cleanly surface the abortion at this level.
|
||||||
|
ureq::Error::Status(u16::MAX, _) => Error::AbortedTask,
|
||||||
|
ureq::Error::Status(_, response) => match response.into_json() {
|
||||||
|
Ok(MeiliError { message, code, r#type, link }) => {
|
||||||
|
Error::FromRemoteWhenExporting { message, code, r#type, link }
|
||||||
|
}
|
||||||
|
Err(e) => e.into(),
|
||||||
|
},
|
||||||
|
ureq::Error::Transport(transport) => io::Error::new(io::ErrorKind::Other, transport).into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum ExportIndex {}
|
@ -1,8 +1,10 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use bumpalo::collections::CollectIn;
|
use bumpalo::collections::CollectIn;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use meilisearch_types::heed::RwTxn;
|
use meilisearch_types::heed::RwTxn;
|
||||||
use meilisearch_types::milli::documents::PrimaryKey;
|
use meilisearch_types::milli::documents::PrimaryKey;
|
||||||
use meilisearch_types::milli::progress::Progress;
|
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||||
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
||||||
use meilisearch_types::milli::update::DocumentAdditionResult;
|
use meilisearch_types::milli::update::DocumentAdditionResult;
|
||||||
use meilisearch_types::milli::{self, ChannelCongestion, Filter};
|
use meilisearch_types::milli::{self, ChannelCongestion, Filter};
|
||||||
@ -24,7 +26,7 @@ impl IndexScheduler {
|
|||||||
/// The list of processed tasks.
|
/// The list of processed tasks.
|
||||||
#[tracing::instrument(
|
#[tracing::instrument(
|
||||||
level = "trace",
|
level = "trace",
|
||||||
skip(self, index_wtxn, index, progress),
|
skip(self, index_wtxn, index, progress, embedder_stats),
|
||||||
target = "indexing::scheduler"
|
target = "indexing::scheduler"
|
||||||
)]
|
)]
|
||||||
pub(crate) fn apply_index_operation<'i>(
|
pub(crate) fn apply_index_operation<'i>(
|
||||||
@ -33,6 +35,7 @@ impl IndexScheduler {
|
|||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
operation: IndexOperation,
|
operation: IndexOperation,
|
||||||
progress: &Progress,
|
progress: &Progress,
|
||||||
|
embedder_stats: Arc<EmbedderStats>,
|
||||||
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||||
let indexer_alloc = Bump::new();
|
let indexer_alloc = Bump::new();
|
||||||
let started_processing_at = std::time::Instant::now();
|
let started_processing_at = std::time::Instant::now();
|
||||||
@ -86,8 +89,9 @@ impl IndexScheduler {
|
|||||||
let mut content_files_iter = content_files.iter();
|
let mut content_files_iter = content_files.iter();
|
||||||
let mut indexer = indexer::DocumentOperation::new();
|
let mut indexer = indexer::DocumentOperation::new();
|
||||||
let embedders = index
|
let embedders = index
|
||||||
|
.embedding_configs()
|
||||||
.embedding_configs(index_wtxn)
|
.embedding_configs(index_wtxn)
|
||||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
.map_err(|e| Error::from_milli(e.into(), Some(index_uid.clone())))?;
|
||||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||||
for operation in operations {
|
for operation in operations {
|
||||||
match operation {
|
match operation {
|
||||||
@ -177,6 +181,7 @@ impl IndexScheduler {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
progress,
|
progress,
|
||||||
|
&embedder_stats,
|
||||||
)
|
)
|
||||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
|
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
|
||||||
);
|
);
|
||||||
@ -270,8 +275,9 @@ impl IndexScheduler {
|
|||||||
})
|
})
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
let embedders = index
|
let embedders = index
|
||||||
|
.embedding_configs()
|
||||||
.embedding_configs(index_wtxn)
|
.embedding_configs(index_wtxn)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||||
|
|
||||||
progress.update_progress(DocumentEditionProgress::Indexing);
|
progress.update_progress(DocumentEditionProgress::Indexing);
|
||||||
@ -288,6 +294,7 @@ impl IndexScheduler {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
progress,
|
progress,
|
||||||
|
&embedder_stats,
|
||||||
)
|
)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||||
);
|
);
|
||||||
@ -418,8 +425,9 @@ impl IndexScheduler {
|
|||||||
indexer.delete_documents_by_docids(to_delete);
|
indexer.delete_documents_by_docids(to_delete);
|
||||||
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
||||||
let embedders = index
|
let embedders = index
|
||||||
|
.embedding_configs()
|
||||||
.embedding_configs(index_wtxn)
|
.embedding_configs(index_wtxn)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||||
|
|
||||||
progress.update_progress(DocumentDeletionProgress::Indexing);
|
progress.update_progress(DocumentDeletionProgress::Indexing);
|
||||||
@ -436,6 +444,7 @@ impl IndexScheduler {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
progress,
|
progress,
|
||||||
|
&embedder_stats,
|
||||||
)
|
)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||||
);
|
);
|
||||||
@ -468,14 +477,11 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
progress.update_progress(SettingsProgress::ApplyTheSettings);
|
progress.update_progress(SettingsProgress::ApplyTheSettings);
|
||||||
builder
|
let congestion = builder
|
||||||
.execute(
|
.execute(&|| must_stop_processing.get(), progress, embedder_stats)
|
||||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
|
||||||
|| must_stop_processing.get(),
|
|
||||||
)
|
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
|
|
||||||
Ok((tasks, None))
|
Ok((tasks, congestion))
|
||||||
}
|
}
|
||||||
IndexOperation::DocumentClearAndSetting {
|
IndexOperation::DocumentClearAndSetting {
|
||||||
index_uid,
|
index_uid,
|
||||||
@ -491,6 +497,7 @@ impl IndexScheduler {
|
|||||||
tasks: cleared_tasks,
|
tasks: cleared_tasks,
|
||||||
},
|
},
|
||||||
progress,
|
progress,
|
||||||
|
embedder_stats.clone(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let (settings_tasks, _congestion) = self.apply_index_operation(
|
let (settings_tasks, _congestion) = self.apply_index_operation(
|
||||||
@ -498,6 +505,7 @@ impl IndexScheduler {
|
|||||||
index,
|
index,
|
||||||
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
||||||
progress,
|
progress,
|
||||||
|
embedder_stats,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut tasks = settings_tasks;
|
let mut tasks = settings_tasks;
|
||||||
|
@ -0,0 +1,17 @@
|
|||||||
|
---
|
||||||
|
source: crates/index-scheduler/src/scheduler/test.rs
|
||||||
|
expression: config.embedder_options
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"Rest": {
|
||||||
|
"api_key": "My super secret",
|
||||||
|
"distribution": null,
|
||||||
|
"dimensions": 4,
|
||||||
|
"url": "http://localhost:7777",
|
||||||
|
"request": "{{text}}",
|
||||||
|
"search_fragments": {},
|
||||||
|
"indexing_fragments": {},
|
||||||
|
"response": "{{embedding}}",
|
||||||
|
"headers": {}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,12 @@
|
|||||||
|
---
|
||||||
|
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||||
|
expression: simple_hf_config.embedder_options
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"HuggingFace": {
|
||||||
|
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||||
|
"distribution": null,
|
||||||
|
"pooling": "useModel"
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,15 @@
|
|||||||
|
---
|
||||||
|
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||||
|
expression: doc
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"doggo": "Intel",
|
||||||
|
"breed": "beagle",
|
||||||
|
"_vectors": {
|
||||||
|
"noise": [
|
||||||
|
0.1,
|
||||||
|
0.2,
|
||||||
|
0.3
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,15 @@
|
|||||||
|
---
|
||||||
|
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||||
|
expression: doc
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"doggo": "kefir",
|
||||||
|
"breed": "patou",
|
||||||
|
"_vectors": {
|
||||||
|
"noise": [
|
||||||
|
0.1,
|
||||||
|
0.2,
|
||||||
|
0.3
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
@ -1,12 +1,17 @@
|
|||||||
---
|
---
|
||||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||||
expression: simple_hf_config.embedder_options
|
expression: fakerest_config.embedder_options
|
||||||
---
|
---
|
||||||
{
|
{
|
||||||
"HuggingFace": {
|
"Rest": {
|
||||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
"api_key": "My super secret",
|
||||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
|
||||||
"distribution": null,
|
"distribution": null,
|
||||||
"pooling": "useModel"
|
"dimensions": 384,
|
||||||
|
"url": "http://localhost:7777",
|
||||||
|
"request": "{{text}}",
|
||||||
|
"search_fragments": {},
|
||||||
|
"indexing_fragments": {},
|
||||||
|
"response": "{{embedding}}",
|
||||||
|
"headers": {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
|||||||
[timestamp] [4,]
|
[timestamp] [4,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
@ -37,7 +37,7 @@ catto [1,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
@ -40,7 +40,7 @@ doggo [2,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 16, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||||
@ -43,7 +43,7 @@ doggo [2,3,]
|
|||||||
[timestamp] [0,]
|
[timestamp] [0,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Batches:
|
### All Batches:
|
||||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.16.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batch to tasks mapping:
|
### Batch to tasks mapping:
|
||||||
0 [0,]
|
0 [0,]
|
||||||
|
@ -3,11 +3,11 @@ use std::collections::BTreeMap;
|
|||||||
use big_s::S;
|
use big_s::S;
|
||||||
use meili_snap::{json_string, snapshot};
|
use meili_snap::{json_string, snapshot};
|
||||||
use meilisearch_auth::AuthFilter;
|
use meilisearch_auth::AuthFilter;
|
||||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
|
||||||
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
|
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
|
||||||
use meilisearch_types::milli::{self};
|
use meilisearch_types::milli::{self};
|
||||||
use meilisearch_types::settings::SettingEmbeddingSettings;
|
use meilisearch_types::settings::SettingEmbeddingSettings;
|
||||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||||
|
use milli::vector::db::IndexEmbeddingConfig;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||||
@ -690,11 +690,20 @@ fn test_settings_update() {
|
|||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let embedders = index.embedding_configs();
|
||||||
let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap();
|
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||||
|
let IndexEmbeddingConfig { name, config, fragments } = configs.first().unwrap();
|
||||||
|
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||||
|
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||||
insta::assert_snapshot!(name, @"default");
|
insta::assert_snapshot!(name, @"default");
|
||||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
|
||||||
insta::assert_json_snapshot!(config.embedder_options);
|
insta::assert_json_snapshot!(config.embedder_options);
|
||||||
|
insta::assert_debug_snapshot!(fragments, @r###"
|
||||||
|
FragmentConfigs(
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -732,6 +741,7 @@ fn basic_get_stats() {
|
|||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
"documentEdition": 0,
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
|
"export": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
"indexSwap": 0,
|
"indexSwap": 0,
|
||||||
@ -765,6 +775,7 @@ fn basic_get_stats() {
|
|||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
"documentEdition": 0,
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
|
"export": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
"indexSwap": 0,
|
"indexSwap": 0,
|
||||||
@ -805,6 +816,7 @@ fn basic_get_stats() {
|
|||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
"documentEdition": 0,
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
|
"export": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
"indexSwap": 0,
|
"indexSwap": 0,
|
||||||
@ -846,6 +858,7 @@ fn basic_get_stats() {
|
|||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
"documentEdition": 0,
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
|
"export": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
"indexSwap": 0,
|
"indexSwap": 0,
|
||||||
|
@ -3,13 +3,15 @@ use std::collections::BTreeMap;
|
|||||||
use big_s::S;
|
use big_s::S;
|
||||||
use insta::assert_json_snapshot;
|
use insta::assert_json_snapshot;
|
||||||
use meili_snap::{json_string, snapshot};
|
use meili_snap::{json_string, snapshot};
|
||||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
use meilisearch_types::milli::update::Setting;
|
use meilisearch_types::milli::update::Setting;
|
||||||
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
||||||
|
use meilisearch_types::milli::vector::SearchQuery;
|
||||||
use meilisearch_types::milli::{self, obkv_to_json};
|
use meilisearch_types::milli::{self, obkv_to_json};
|
||||||
use meilisearch_types::settings::{SettingEmbeddingSettings, Settings, Unchecked};
|
use meilisearch_types::settings::{SettingEmbeddingSettings, Settings, Unchecked};
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
use milli::update::IndexDocumentsMethod::*;
|
use milli::update::IndexDocumentsMethod::*;
|
||||||
|
use milli::vector::db::IndexEmbeddingConfig;
|
||||||
|
|
||||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||||
use crate::test_utils::read_json;
|
use crate::test_utils::read_json;
|
||||||
@ -85,28 +87,51 @@ fn import_vectors() {
|
|||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let embedders = index.embedding_configs();
|
||||||
|
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||||
// for consistency with the below
|
// for consistency with the below
|
||||||
#[allow(clippy::get_first)]
|
#[allow(clippy::get_first)]
|
||||||
let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } =
|
let IndexEmbeddingConfig { name, config: fakerest_config, fragments } =
|
||||||
configs.get(0).unwrap();
|
configs.get(0).unwrap();
|
||||||
|
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||||
|
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||||
insta::assert_snapshot!(name, @"A_fakerest");
|
insta::assert_snapshot!(name, @"A_fakerest");
|
||||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
insta::assert_debug_snapshot!(fragments, @r###"
|
||||||
|
FragmentConfigs(
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
"###);
|
||||||
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
||||||
let fakerest_name = name.clone();
|
let fakerest_name = name.clone();
|
||||||
|
|
||||||
let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } =
|
let IndexEmbeddingConfig { name, config: simple_hf_config, fragments } =
|
||||||
configs.get(1).unwrap();
|
configs.get(1).unwrap();
|
||||||
|
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||||
|
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||||
insta::assert_snapshot!(name, @"B_small_hf");
|
insta::assert_snapshot!(name, @"B_small_hf");
|
||||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
insta::assert_debug_snapshot!(fragments, @r###"
|
||||||
|
FragmentConfigs(
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
"###);
|
||||||
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
||||||
let simple_hf_name = name.clone();
|
let simple_hf_name = name.clone();
|
||||||
|
|
||||||
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
|
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
|
||||||
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
let hf_runtime = configs.get(&simple_hf_name).unwrap();
|
||||||
let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap();
|
let hf_embedder = &hf_runtime.embedder;
|
||||||
let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap();
|
let beagle_embed = hf_embedder
|
||||||
let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap();
|
.embed_search(SearchQuery::Text("Intel the beagle best doggo"), None)
|
||||||
|
.unwrap();
|
||||||
|
let lab_embed =
|
||||||
|
hf_embedder.embed_search(SearchQuery::Text("Max the lab best doggo"), None).unwrap();
|
||||||
|
let patou_embed = hf_embedder
|
||||||
|
.embed_search(SearchQuery::Text("kefir the patou best doggo"), None)
|
||||||
|
.unwrap();
|
||||||
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
|
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -166,22 +191,38 @@ fn import_vectors() {
|
|||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
// Ensure the document have been inserted into the relevant bitamp
|
// Ensure the document have been inserted into the relevant bitamp
|
||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let embedders = index.embedding_configs();
|
||||||
|
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||||
// for consistency with the below
|
// for consistency with the below
|
||||||
#[allow(clippy::get_first)]
|
#[allow(clippy::get_first)]
|
||||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
|
||||||
configs.get(0).unwrap();
|
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||||
|
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
|
||||||
insta::assert_snapshot!(name, @"A_fakerest");
|
insta::assert_snapshot!(name, @"A_fakerest");
|
||||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
insta::assert_debug_snapshot!(fragments, @r###"
|
||||||
|
FragmentConfigs(
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
"###);
|
||||||
|
|
||||||
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
|
||||||
|
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||||
|
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||||
insta::assert_snapshot!(name, @"B_small_hf");
|
insta::assert_snapshot!(name, @"B_small_hf");
|
||||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
insta::assert_debug_snapshot!(fragments, @r###"
|
||||||
|
FragmentConfigs(
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
"###);
|
||||||
|
|
||||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||||
|
|
||||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == lab_embed, @"true");
|
||||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
|
||||||
|
|
||||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
@ -239,25 +280,41 @@ fn import_vectors() {
|
|||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let embedders = index.embedding_configs();
|
||||||
// Ensure the document have been inserted into the relevant bitamp
|
// Ensure the document have been inserted into the relevant bitamp
|
||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||||
// for consistency with the below
|
// for consistency with the below
|
||||||
#[allow(clippy::get_first)]
|
#[allow(clippy::get_first)]
|
||||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
|
||||||
configs.get(0).unwrap();
|
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||||
|
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
|
||||||
insta::assert_snapshot!(name, @"A_fakerest");
|
insta::assert_snapshot!(name, @"A_fakerest");
|
||||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
insta::assert_debug_snapshot!(fragments, @r###"
|
||||||
|
FragmentConfigs(
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
"###);
|
||||||
|
|
||||||
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
|
||||||
|
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||||
|
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||||
insta::assert_snapshot!(name, @"B_small_hf");
|
insta::assert_snapshot!(name, @"B_small_hf");
|
||||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
insta::assert_debug_snapshot!(fragments, @r###"
|
||||||
|
FragmentConfigs(
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
"###);
|
||||||
|
|
||||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||||
|
|
||||||
// automatically changed to patou because set to regenerate
|
// automatically changed to patou because set to regenerate
|
||||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
|
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == patou_embed, @"true");
|
||||||
// remained beagle
|
// remained beagle
|
||||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
|
||||||
|
|
||||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
@ -399,8 +456,8 @@ fn import_vectors_first_and_embedder_later() {
|
|||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
// the all the vectors linked to the new specified embedder have been removed
|
// the all the vectors linked to the new specified embedder have been removed
|
||||||
// Only the unknown embedders stays in the document DB
|
// Only the unknown embedders stays in the document DB
|
||||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
|
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1,2,3]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4,5]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
|
||||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||||
// even though we specified the vector for the ID 3, it shouldn't be marked
|
// even though we specified the vector for the ID 3, it shouldn't be marked
|
||||||
// as user provided since we explicitely marked it as NOT user provided.
|
// as user provided since we explicitely marked it as NOT user provided.
|
||||||
snapshot!(format!("{conf:#?}"), @r###"
|
snapshot!(format!("{conf:#?}"), @r###"
|
||||||
@ -426,19 +483,28 @@ fn import_vectors_first_and_embedder_later() {
|
|||||||
},
|
},
|
||||||
quantized: None,
|
quantized: None,
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[1, 2]>,
|
fragments: FragmentConfigs(
|
||||||
|
[],
|
||||||
|
),
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
|
let info =
|
||||||
|
index.embedding_configs().embedder_info(&rtxn, "my_doggo_embedder").unwrap().unwrap();
|
||||||
|
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||||
|
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[1, 2, 3]>");
|
||||||
|
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[1, 2]>");
|
||||||
|
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
let embedding = &embeddings["my_doggo_embedder"];
|
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
|
||||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
assert!(!embeddings.is_empty(), "{embeddings:?}");
|
||||||
|
|
||||||
// the document with the id 3 should keep its original embedding
|
// the document with the id 3 should keep its original embedding
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
let embeddings = &embeddings["my_doggo_embedder"];
|
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
|
||||||
|
|
||||||
snapshot!(embeddings.len(), @"1");
|
snapshot!(embeddings.len(), @"1");
|
||||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||||
@ -493,7 +559,7 @@ fn import_vectors_first_and_embedder_later() {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
let embedding = &embeddings["my_doggo_embedder"];
|
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
|
||||||
|
|
||||||
assert!(!embedding.is_empty());
|
assert!(!embedding.is_empty());
|
||||||
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
|
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
|
||||||
@ -501,7 +567,7 @@ fn import_vectors_first_and_embedder_later() {
|
|||||||
// the document with the id 4 should generate an embedding
|
// the document with the id 4 should generate an embedding
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
|
||||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
let embedding = &embeddings["my_doggo_embedder"];
|
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
|
||||||
|
|
||||||
assert!(!embedding.is_empty());
|
assert!(!embedding.is_empty());
|
||||||
}
|
}
|
||||||
@ -603,33 +669,35 @@ fn delete_document_containing_vector() {
|
|||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
||||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||||
snapshot!(format!("{conf:#?}"), @r###"
|
snapshot!(format!("{conf:#?}"), @r###"
|
||||||
[
|
[
|
||||||
IndexEmbeddingConfig {
|
IndexEmbeddingConfig {
|
||||||
name: "manual",
|
name: "manual",
|
||||||
config: EmbeddingConfig {
|
config: EmbeddingConfig {
|
||||||
embedder_options: UserProvided(
|
embedder_options: UserProvided(
|
||||||
EmbedderOptions {
|
EmbedderOptions {
|
||||||
dimensions: 3,
|
dimensions: 3,
|
||||||
distribution: None,
|
distribution: None,
|
||||||
},
|
|
||||||
),
|
|
||||||
prompt: PromptData {
|
|
||||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
|
||||||
max_bytes: Some(
|
|
||||||
400,
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
quantized: None,
|
),
|
||||||
|
prompt: PromptData {
|
||||||
|
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||||
|
max_bytes: Some(
|
||||||
|
400,
|
||||||
|
),
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[0]>,
|
quantized: None,
|
||||||
},
|
},
|
||||||
]
|
fragments: FragmentConfigs(
|
||||||
"###);
|
[],
|
||||||
|
),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
"###);
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
let embedding = &embeddings["manual"];
|
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["manual"];
|
||||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||||
|
|
||||||
index_scheduler
|
index_scheduler
|
||||||
@ -647,30 +715,32 @@ fn delete_document_containing_vector() {
|
|||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
||||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||||
snapshot!(format!("{conf:#?}"), @r###"
|
snapshot!(format!("{conf:#?}"), @r###"
|
||||||
[
|
[
|
||||||
IndexEmbeddingConfig {
|
IndexEmbeddingConfig {
|
||||||
name: "manual",
|
name: "manual",
|
||||||
config: EmbeddingConfig {
|
config: EmbeddingConfig {
|
||||||
embedder_options: UserProvided(
|
embedder_options: UserProvided(
|
||||||
EmbedderOptions {
|
EmbedderOptions {
|
||||||
dimensions: 3,
|
dimensions: 3,
|
||||||
distribution: None,
|
distribution: None,
|
||||||
},
|
|
||||||
),
|
|
||||||
prompt: PromptData {
|
|
||||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
|
||||||
max_bytes: Some(
|
|
||||||
400,
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
quantized: None,
|
),
|
||||||
|
prompt: PromptData {
|
||||||
|
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||||
|
max_bytes: Some(
|
||||||
|
400,
|
||||||
|
),
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[]>,
|
quantized: None,
|
||||||
},
|
},
|
||||||
]
|
fragments: FragmentConfigs(
|
||||||
"###);
|
[],
|
||||||
|
),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -800,7 +870,7 @@ fn delete_embedder_with_user_provided_vectors() {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
|
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"regenerate":false,"embeddings":[[0.0,0.0,0.0]]}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"regenerate":false,"embeddings":[[1.0,1.0,1.0]]}}}]"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -835,6 +905,6 @@ fn delete_embedder_with_user_provided_vectors() {
|
|||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
// FIXME: redaction
|
// FIXME: redaction
|
||||||
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###);
|
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[0.0,0.0,0.0]]},\"my_doggo_embedder\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]]}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0]]}}}]""###);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -37,6 +37,7 @@ pub(crate) enum FailureLocation {
|
|||||||
InsideCreateBatch,
|
InsideCreateBatch,
|
||||||
InsideProcessBatch,
|
InsideProcessBatch,
|
||||||
PanicInsideProcessBatch,
|
PanicInsideProcessBatch,
|
||||||
|
ProcessExport,
|
||||||
ProcessUpgrade,
|
ProcessUpgrade,
|
||||||
AcquiringWtxn,
|
AcquiringWtxn,
|
||||||
UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 },
|
UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 },
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
//! Utility functions on the DBs. Mainly getter and setters.
|
//! Utility functions on the DBs. Mainly getter and setters.
|
||||||
|
|
||||||
|
use crate::milli::progress::EmbedderStats;
|
||||||
use std::collections::{BTreeSet, HashSet};
|
use std::collections::{BTreeSet, HashSet};
|
||||||
use std::ops::Bound;
|
use std::ops::Bound;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
|
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
|
||||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||||
@ -27,6 +29,7 @@ pub struct ProcessingBatch {
|
|||||||
pub uid: BatchId,
|
pub uid: BatchId,
|
||||||
pub details: DetailsView,
|
pub details: DetailsView,
|
||||||
pub stats: BatchStats,
|
pub stats: BatchStats,
|
||||||
|
pub embedder_stats: Arc<EmbedderStats>,
|
||||||
|
|
||||||
pub statuses: HashSet<Status>,
|
pub statuses: HashSet<Status>,
|
||||||
pub kinds: HashSet<Kind>,
|
pub kinds: HashSet<Kind>,
|
||||||
@ -48,6 +51,7 @@ impl ProcessingBatch {
|
|||||||
uid,
|
uid,
|
||||||
details: DetailsView::default(),
|
details: DetailsView::default(),
|
||||||
stats: BatchStats::default(),
|
stats: BatchStats::default(),
|
||||||
|
embedder_stats: Default::default(),
|
||||||
|
|
||||||
statuses,
|
statuses,
|
||||||
kinds: HashSet::default(),
|
kinds: HashSet::default(),
|
||||||
@ -146,6 +150,7 @@ impl ProcessingBatch {
|
|||||||
progress: None,
|
progress: None,
|
||||||
details: self.details.clone(),
|
details: self.details.clone(),
|
||||||
stats: self.stats.clone(),
|
stats: self.stats.clone(),
|
||||||
|
embedder_stats: self.embedder_stats.as_ref().into(),
|
||||||
started_at: self.started_at,
|
started_at: self.started_at,
|
||||||
finished_at: self.finished_at,
|
finished_at: self.finished_at,
|
||||||
enqueued_at: self.enqueued_at,
|
enqueued_at: self.enqueued_at,
|
||||||
@ -273,6 +278,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
|||||||
K::TaskCancelation { .. }
|
K::TaskCancelation { .. }
|
||||||
| K::TaskDeletion { .. }
|
| K::TaskDeletion { .. }
|
||||||
| K::DumpCreation { .. }
|
| K::DumpCreation { .. }
|
||||||
|
| K::Export { .. }
|
||||||
| K::UpgradeDatabase { .. }
|
| K::UpgradeDatabase { .. }
|
||||||
| K::SnapshotCreation => (),
|
| K::SnapshotCreation => (),
|
||||||
};
|
};
|
||||||
@ -600,6 +606,9 @@ impl crate::IndexScheduler {
|
|||||||
Details::Dump { dump_uid: _ } => {
|
Details::Dump { dump_uid: _ } => {
|
||||||
assert_eq!(kind.as_kind(), Kind::DumpCreation);
|
assert_eq!(kind.as_kind(), Kind::DumpCreation);
|
||||||
}
|
}
|
||||||
|
Details::Export { url: _, api_key: _, payload_size: _, indexes: _ } => {
|
||||||
|
assert_eq!(kind.as_kind(), Kind::Export);
|
||||||
|
}
|
||||||
Details::UpgradeDatabase { from: _, to: _ } => {
|
Details::UpgradeDatabase { from: _, to: _ } => {
|
||||||
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
|
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,7 @@ license.workspace = true
|
|||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
criterion = "0.5.1"
|
criterion = "0.6.0"
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
name = "depth"
|
name = "depth"
|
||||||
|
@ -14,6 +14,6 @@ license.workspace = true
|
|||||||
# fixed version due to format breakages in v1.40
|
# fixed version due to format breakages in v1.40
|
||||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||||
md5 = "0.7.0"
|
md5 = "0.7.0"
|
||||||
once_cell = "1.20"
|
once_cell = "1.21"
|
||||||
regex-lite = "0.1.6"
|
regex-lite = "0.1.6"
|
||||||
uuid = { version = "1.17.0", features = ["v4"] }
|
uuid = { version = "1.17.0", features = ["v4"] }
|
||||||
|
@ -17,10 +17,10 @@ hmac = "0.12.1"
|
|||||||
maplit = "1.0.2"
|
maplit = "1.0.2"
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
roaring = { version = "0.10.10", features = ["serde"] }
|
roaring = { version = "0.10.12", features = ["serde"] }
|
||||||
serde = { version = "1.0.217", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||||
sha2 = "0.10.8"
|
sha2 = "0.10.9"
|
||||||
thiserror = "2.0.9"
|
thiserror = "2.0.12"
|
||||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||||
|
@ -158,7 +158,7 @@ impl AuthController {
|
|||||||
self.store.delete_all_keys()
|
self.store.delete_all_keys()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Delete all the keys in the DB.
|
/// Insert a key directly into the store.
|
||||||
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
|
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
|
||||||
self.store.put_api_key(key)?;
|
self.store.put_api_key(key)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -351,6 +351,7 @@ pub struct IndexSearchRules {
|
|||||||
|
|
||||||
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
|
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
|
||||||
store.put_api_key(Key::default_chat())?;
|
store.put_api_key(Key::default_chat())?;
|
||||||
|
store.put_api_key(Key::default_read_only_admin())?;
|
||||||
store.put_api_key(Key::default_admin())?;
|
store.put_api_key(Key::default_admin())?;
|
||||||
store.put_api_key(Key::default_search())?;
|
store.put_api_key(Key::default_search())?;
|
||||||
|
|
||||||
|
@ -88,7 +88,13 @@ impl HeedAuthStore {
|
|||||||
let mut actions = HashSet::new();
|
let mut actions = HashSet::new();
|
||||||
for action in &key.actions {
|
for action in &key.actions {
|
||||||
match action {
|
match action {
|
||||||
Action::All => actions.extend(enum_iterator::all::<Action>()),
|
Action::All => {
|
||||||
|
actions.extend(enum_iterator::all::<Action>());
|
||||||
|
actions.remove(&Action::AllGet);
|
||||||
|
}
|
||||||
|
Action::AllGet => {
|
||||||
|
actions.extend(enum_iterator::all::<Action>().filter(|a| a.is_read()))
|
||||||
|
}
|
||||||
Action::DocumentsAll => {
|
Action::DocumentsAll => {
|
||||||
actions.extend(
|
actions.extend(
|
||||||
[Action::DocumentsGet, Action::DocumentsDelete, Action::DocumentsAdd]
|
[Action::DocumentsGet, Action::DocumentsDelete, Action::DocumentsAdd]
|
||||||
|
@ -11,37 +11,38 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
actix-web = { version = "4.9.0", default-features = false }
|
actix-web = { version = "4.11.0", default-features = false }
|
||||||
anyhow = "1.0.95"
|
anyhow = "1.0.98"
|
||||||
bumpalo = "3.16.0"
|
bumpalo = "3.18.1"
|
||||||
bumparaw-collections = "0.1.4"
|
bumparaw-collections = "0.1.4"
|
||||||
convert_case = "0.6.0"
|
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||||
|
convert_case = "0.8.0"
|
||||||
csv = "1.3.1"
|
csv = "1.3.1"
|
||||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||||
either = { version = "1.13.0", features = ["serde"] }
|
either = { version = "1.15.0", features = ["serde"] }
|
||||||
enum-iterator = "2.1.0"
|
enum-iterator = "2.1.0"
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
flate2 = "1.0.35"
|
flate2 = "1.1.2"
|
||||||
fst = "0.4.7"
|
fst = "0.4.7"
|
||||||
memmap2 = "0.9.5"
|
memmap2 = "0.9.5"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
roaring = { version = "0.10.10", features = ["serde"] }
|
roaring = { version = "0.10.12", features = ["serde"] }
|
||||||
rustc-hash = "2.1.0"
|
rustc-hash = "2.1.1"
|
||||||
serde = { version = "1.0.217", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
serde-cs = "0.2.4"
|
serde-cs = "0.2.4"
|
||||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||||
tar = "0.4.43"
|
tar = "0.4.44"
|
||||||
tempfile = "3.15.0"
|
tempfile = "3.20.0"
|
||||||
thiserror = "2.0.9"
|
thiserror = "2.0.12"
|
||||||
time = { version = "0.3.37", features = [
|
time = { version = "0.3.41", features = [
|
||||||
"serde-well-known",
|
"serde-well-known",
|
||||||
"formatting",
|
"formatting",
|
||||||
"parsing",
|
"parsing",
|
||||||
"macros",
|
"macros",
|
||||||
] }
|
] }
|
||||||
tokio = "1.43"
|
tokio = "1.45"
|
||||||
utoipa = { version = "5.3.1", features = ["macros"] }
|
utoipa = { version = "5.4.0", features = ["macros"] }
|
||||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
# fixed version due to format breakages in v1.40
|
# fixed version due to format breakages in v1.40
|
||||||
|
@ -3,7 +3,7 @@ use serde::Serialize;
|
|||||||
use time::{Duration, OffsetDateTime};
|
use time::{Duration, OffsetDateTime};
|
||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
|
|
||||||
use crate::batches::{Batch, BatchId, BatchStats};
|
use crate::batches::{Batch, BatchId, BatchStats, EmbedderStatsView};
|
||||||
use crate::task_view::DetailsView;
|
use crate::task_view::DetailsView;
|
||||||
use crate::tasks::serialize_duration;
|
use crate::tasks::serialize_duration;
|
||||||
|
|
||||||
@ -14,7 +14,7 @@ pub struct BatchView {
|
|||||||
pub uid: BatchId,
|
pub uid: BatchId,
|
||||||
pub progress: Option<ProgressView>,
|
pub progress: Option<ProgressView>,
|
||||||
pub details: DetailsView,
|
pub details: DetailsView,
|
||||||
pub stats: BatchStats,
|
pub stats: BatchStatsView,
|
||||||
#[serde(serialize_with = "serialize_duration", default)]
|
#[serde(serialize_with = "serialize_duration", default)]
|
||||||
pub duration: Option<Duration>,
|
pub duration: Option<Duration>,
|
||||||
#[serde(with = "time::serde::rfc3339", default)]
|
#[serde(with = "time::serde::rfc3339", default)]
|
||||||
@ -25,13 +25,26 @@ pub struct BatchView {
|
|||||||
pub batch_strategy: String,
|
pub batch_strategy: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, ToSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
#[schema(rename_all = "camelCase")]
|
||||||
|
pub struct BatchStatsView {
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub stats: BatchStats,
|
||||||
|
#[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)]
|
||||||
|
pub embedder_requests: EmbedderStatsView,
|
||||||
|
}
|
||||||
|
|
||||||
impl BatchView {
|
impl BatchView {
|
||||||
pub fn from_batch(batch: &Batch) -> Self {
|
pub fn from_batch(batch: &Batch) -> Self {
|
||||||
Self {
|
Self {
|
||||||
uid: batch.uid,
|
uid: batch.uid,
|
||||||
progress: batch.progress.clone(),
|
progress: batch.progress.clone(),
|
||||||
details: batch.details.clone(),
|
details: batch.details.clone(),
|
||||||
stats: batch.stats.clone(),
|
stats: BatchStatsView {
|
||||||
|
stats: batch.stats.clone(),
|
||||||
|
embedder_requests: batch.embedder_stats.clone(),
|
||||||
|
},
|
||||||
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
|
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
|
||||||
started_at: batch.started_at,
|
started_at: batch.started_at,
|
||||||
finished_at: batch.finished_at,
|
finished_at: batch.finished_at,
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
use milli::progress::ProgressView;
|
use milli::progress::{EmbedderStats, ProgressView};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
@ -19,6 +19,8 @@ pub struct Batch {
|
|||||||
pub progress: Option<ProgressView>,
|
pub progress: Option<ProgressView>,
|
||||||
pub details: DetailsView,
|
pub details: DetailsView,
|
||||||
pub stats: BatchStats,
|
pub stats: BatchStats,
|
||||||
|
#[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)]
|
||||||
|
pub embedder_stats: EmbedderStatsView,
|
||||||
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
#[serde(with = "time::serde::rfc3339")]
|
||||||
pub started_at: OffsetDateTime,
|
pub started_at: OffsetDateTime,
|
||||||
@ -43,6 +45,7 @@ impl PartialEq for Batch {
|
|||||||
progress,
|
progress,
|
||||||
details,
|
details,
|
||||||
stats,
|
stats,
|
||||||
|
embedder_stats,
|
||||||
started_at,
|
started_at,
|
||||||
finished_at,
|
finished_at,
|
||||||
enqueued_at,
|
enqueued_at,
|
||||||
@ -53,6 +56,7 @@ impl PartialEq for Batch {
|
|||||||
&& progress.is_none() == other.progress.is_none()
|
&& progress.is_none() == other.progress.is_none()
|
||||||
&& details == &other.details
|
&& details == &other.details
|
||||||
&& stats == &other.stats
|
&& stats == &other.stats
|
||||||
|
&& embedder_stats == &other.embedder_stats
|
||||||
&& started_at == &other.started_at
|
&& started_at == &other.started_at
|
||||||
&& finished_at == &other.finished_at
|
&& finished_at == &other.finished_at
|
||||||
&& enqueued_at == &other.enqueued_at
|
&& enqueued_at == &other.enqueued_at
|
||||||
@ -83,3 +87,30 @@ pub struct BatchStats {
|
|||||||
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
|
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
|
||||||
pub internal_database_sizes: serde_json::Map<String, serde_json::Value>,
|
pub internal_database_sizes: serde_json::Map<String, serde_json::Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
#[schema(rename_all = "camelCase")]
|
||||||
|
pub struct EmbedderStatsView {
|
||||||
|
pub total: usize,
|
||||||
|
pub failed: usize,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
|
pub last_error: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&EmbedderStats> for EmbedderStatsView {
|
||||||
|
fn from(stats: &EmbedderStats) -> Self {
|
||||||
|
let errors = stats.errors.read().unwrap_or_else(|p| p.into_inner());
|
||||||
|
Self {
|
||||||
|
total: stats.total_count.load(std::sync::atomic::Ordering::Relaxed),
|
||||||
|
failed: errors.1 as usize,
|
||||||
|
last_error: errors.0.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EmbedderStatsView {
|
||||||
|
pub fn skip_serializing(&self) -> bool {
|
||||||
|
self.total == 0 && self.failed == 0 && self.last_error.is_none()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -237,6 +237,7 @@ InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQU
|
|||||||
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
|
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||||
@ -301,6 +302,7 @@ InvalidFacetSearchQuery , InvalidRequest , BAD_REQU
|
|||||||
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
||||||
FacetSearchDisabled , InvalidRequest , BAD_REQUEST ;
|
FacetSearchDisabled , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSearchMedia , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||||
@ -308,6 +310,7 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
|
|||||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||||
@ -389,6 +392,13 @@ InvalidDocumentEditionContext , InvalidRequest , BAD_REQU
|
|||||||
InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST ;
|
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsIndexChat , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsIndexChat , InvalidRequest , BAD_REQUEST ;
|
||||||
|
// Export
|
||||||
|
InvalidExportUrl , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidExportApiKey , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidExportPayloadSize , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidExportIndexesPatterns , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidExportIndexFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidExportIndexOverrideSettings , InvalidRequest , BAD_REQUEST ;
|
||||||
// Experimental features - Chat Completions
|
// Experimental features - Chat Completions
|
||||||
UnimplementedExternalFunctionCalling , InvalidRequest , NOT_IMPLEMENTED ;
|
UnimplementedExternalFunctionCalling , InvalidRequest , NOT_IMPLEMENTED ;
|
||||||
UnimplementedNonStreamingChatCompletions , InvalidRequest , NOT_IMPLEMENTED ;
|
UnimplementedNonStreamingChatCompletions , InvalidRequest , NOT_IMPLEMENTED ;
|
||||||
@ -406,6 +416,7 @@ InvalidChatCompletionPrompts , InvalidRequest , BAD_REQU
|
|||||||
InvalidChatCompletionSystemPrompt , InvalidRequest , BAD_REQUEST ;
|
InvalidChatCompletionSystemPrompt , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidChatCompletionSearchDescriptionPrompt , InvalidRequest , BAD_REQUEST ;
|
InvalidChatCompletionSearchDescriptionPrompt , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQUEST ;
|
InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidChatCompletionSearchFilterParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ;
|
InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST
|
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST
|
||||||
}
|
}
|
||||||
@ -457,6 +468,7 @@ impl ErrorCode for milli::Error {
|
|||||||
| UserError::MissingSourceForNested { .. }
|
| UserError::MissingSourceForNested { .. }
|
||||||
| UserError::InvalidSettingsEmbedder { .. } => Code::InvalidSettingsEmbedders,
|
| UserError::InvalidSettingsEmbedder { .. } => Code::InvalidSettingsEmbedders,
|
||||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||||
|
UserError::TooManyFragments(_) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
||||||
UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
|
UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
|
||||||
@ -466,7 +478,8 @@ impl ErrorCode for milli::Error {
|
|||||||
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
|
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
|
||||||
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
|
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
|
||||||
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
|
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
|
||||||
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
|
UserError::InvalidSearchSortableAttribute { .. } => Code::InvalidSearchSort,
|
||||||
|
UserError::InvalidDocumentSortableAttribute { .. } => Code::InvalidDocumentSort,
|
||||||
UserError::InvalidSearchableAttribute { .. } => {
|
UserError::InvalidSearchableAttribute { .. } => {
|
||||||
Code::InvalidSearchAttributesToSearchOn
|
Code::InvalidSearchAttributesToSearchOn
|
||||||
}
|
}
|
||||||
@ -482,7 +495,8 @@ impl ErrorCode for milli::Error {
|
|||||||
UserError::InvalidVectorsMapType { .. }
|
UserError::InvalidVectorsMapType { .. }
|
||||||
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
|
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
|
||||||
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
|
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
|
||||||
UserError::SortError(_) => Code::InvalidSearchSort,
|
UserError::SortError { search: true, .. } => Code::InvalidSearchSort,
|
||||||
|
UserError::SortError { search: false, .. } => Code::InvalidDocumentSort,
|
||||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
||||||
Code::InvalidSettingsTypoTolerance
|
Code::InvalidSettingsTypoTolerance
|
||||||
}
|
}
|
||||||
|
@ -4,10 +4,11 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
use crate::error::{Code, ResponseError};
|
use crate::error::{Code, ResponseError};
|
||||||
|
|
||||||
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search.";
|
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search. Meilisearch doesn't use the colon (:) syntax to filter but rather the equal (=) one. Separate filters from query and keep the q parameter empty if needed. Same for the filter parameter: keep it empty if need be. If you need to find documents that CONTAINS keywords simply put the keywords in the q parameter do no use a filter for this purpose. Whenever you get an error, read the error message and fix your error. ";
|
||||||
pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str =
|
pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str =
|
||||||
"Search the database for relevant JSON documents using an optional query.";
|
"Query: 'best story about Rust before 2018' with year: 2018, 2020, 2021\nlabel: analysis, golang, javascript\ntype: story, link\nvote: 300, 298, 278\n: {\"q\": \"\", \"filter\": \"category = Rust AND type = story AND year < 2018 AND vote > 100\"}\nQuery: 'A black or green car that can go fast with red brakes' with maxspeed_kmh: 200, 150, 130\ncolor: black, grey, red, green\nbrand: Toyota, Renault, Jeep, Ferrari\n: {\"q\": \"red brakes\", \"filter\": \"maxspeed_kmh > 150 AND color IN ['black', green]\"}\nQuery: 'Superman movie released in 2018 or after' with year: 2018, 2020, 2021\ngenres: Drama, Comedy, Adventure, Fiction\n: {\"q\":\"Superman\",\"filter\":\"genres IN [Adventure, Fiction] AND year >= 2018\"}";
|
||||||
pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results.";
|
pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results.";
|
||||||
|
pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\". The following is a list of fields that can be filtered on: ";
|
||||||
pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query.";
|
pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query.";
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
|
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||||
@ -21,6 +22,7 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub get_task_documents_route: bool,
|
pub get_task_documents_route: bool,
|
||||||
pub composite_embedders: bool,
|
pub composite_embedders: bool,
|
||||||
pub chat_completions: bool,
|
pub chat_completions: bool,
|
||||||
|
pub multimodal: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone, Copy)]
|
#[derive(Default, Debug, Clone, Copy)]
|
||||||
@ -114,7 +116,6 @@ pub enum ChatCompletionSource {
|
|||||||
OpenAi,
|
OpenAi,
|
||||||
AzureOpenAi,
|
AzureOpenAi,
|
||||||
Mistral,
|
Mistral,
|
||||||
Gemini,
|
|
||||||
VLlm,
|
VLlm,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -134,7 +135,6 @@ impl ChatCompletionSource {
|
|||||||
AzureOpenAi if Self::old_openai_model(model) => System,
|
AzureOpenAi if Self::old_openai_model(model) => System,
|
||||||
AzureOpenAi => Developer,
|
AzureOpenAi => Developer,
|
||||||
Mistral => System,
|
Mistral => System,
|
||||||
Gemini => System,
|
|
||||||
VLlm => System,
|
VLlm => System,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -154,7 +154,6 @@ impl ChatCompletionSource {
|
|||||||
match self {
|
match self {
|
||||||
OpenAi => Some("https://api.openai.com/v1/"),
|
OpenAi => Some("https://api.openai.com/v1/"),
|
||||||
Mistral => Some("https://api.mistral.ai/v1/"),
|
Mistral => Some("https://api.mistral.ai/v1/"),
|
||||||
Gemini => Some("https://generativelanguage.googleapis.com/v1beta/openai/"),
|
|
||||||
AzureOpenAi | VLlm => None,
|
AzureOpenAi | VLlm => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -163,18 +162,31 @@ impl ChatCompletionSource {
|
|||||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct ChatCompletionPrompts {
|
pub struct ChatCompletionPrompts {
|
||||||
|
#[serde(default)]
|
||||||
pub system: String,
|
pub system: String,
|
||||||
|
#[serde(default)]
|
||||||
pub search_description: String,
|
pub search_description: String,
|
||||||
|
#[serde(default)]
|
||||||
pub search_q_param: String,
|
pub search_q_param: String,
|
||||||
|
#[serde(default = "default_search_filter_param")]
|
||||||
|
pub search_filter_param: String,
|
||||||
|
#[serde(default)]
|
||||||
pub search_index_uid_param: String,
|
pub search_index_uid_param: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This function is used for when the search_filter_param is
|
||||||
|
/// not provided and this can happen when the database is in v1.15.
|
||||||
|
fn default_search_filter_param() -> String {
|
||||||
|
DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string()
|
||||||
|
}
|
||||||
|
|
||||||
impl Default for ChatCompletionPrompts {
|
impl Default for ChatCompletionPrompts {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
system: DEFAULT_CHAT_SYSTEM_PROMPT.to_string(),
|
system: DEFAULT_CHAT_SYSTEM_PROMPT.to_string(),
|
||||||
search_description: DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(),
|
search_description: DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(),
|
||||||
search_q_param: DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
|
search_q_param: DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
|
||||||
|
search_filter_param: DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
|
||||||
search_index_uid_param: DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
|
search_index_uid_param: DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,7 +12,7 @@ use crate::index_uid::{IndexUid, IndexUidFormatError};
|
|||||||
|
|
||||||
/// An index uid pattern is composed of only ascii alphanumeric characters, - and _, between 1 and 400
|
/// An index uid pattern is composed of only ascii alphanumeric characters, - and _, between 1 and 400
|
||||||
/// bytes long and optionally ending with a *.
|
/// bytes long and optionally ending with a *.
|
||||||
#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, Hash)]
|
#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
#[deserr(try_from(&String) = FromStr::from_str -> IndexUidPatternFormatError)]
|
#[deserr(try_from(&String) = FromStr::from_str -> IndexUidPatternFormatError)]
|
||||||
pub struct IndexUidPattern(String);
|
pub struct IndexUidPattern(String);
|
||||||
|
|
||||||
|
@ -144,6 +144,21 @@ impl Key {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn default_read_only_admin() -> Self {
|
||||||
|
let now = OffsetDateTime::now_utc();
|
||||||
|
let uid = Uuid::new_v4();
|
||||||
|
Self {
|
||||||
|
name: Some("Default Read-Only Admin API Key".to_string()),
|
||||||
|
description: Some("Use it to read information across the whole database. Caution! Do not expose this key on a public frontend".to_string()),
|
||||||
|
uid,
|
||||||
|
actions: vec![Action::AllGet, Action::KeysGet],
|
||||||
|
indexes: vec![IndexUidPattern::all()],
|
||||||
|
expires_at: None,
|
||||||
|
created_at: now,
|
||||||
|
updated_at: now,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn default_search() -> Self {
|
pub fn default_search() -> Self {
|
||||||
let now = OffsetDateTime::now_utc();
|
let now = OffsetDateTime::now_utc();
|
||||||
let uid = Uuid::new_v4();
|
let uid = Uuid::new_v4();
|
||||||
@ -218,6 +233,9 @@ pub enum Action {
|
|||||||
#[serde(rename = "*")]
|
#[serde(rename = "*")]
|
||||||
#[deserr(rename = "*")]
|
#[deserr(rename = "*")]
|
||||||
All = 0,
|
All = 0,
|
||||||
|
#[serde(rename = "*.get")]
|
||||||
|
#[deserr(rename = "*.get")]
|
||||||
|
AllGet,
|
||||||
#[serde(rename = "search")]
|
#[serde(rename = "search")]
|
||||||
#[deserr(rename = "search")]
|
#[deserr(rename = "search")]
|
||||||
Search,
|
Search,
|
||||||
@ -317,6 +335,9 @@ pub enum Action {
|
|||||||
#[serde(rename = "experimental.update")]
|
#[serde(rename = "experimental.update")]
|
||||||
#[deserr(rename = "experimental.update")]
|
#[deserr(rename = "experimental.update")]
|
||||||
ExperimentalFeaturesUpdate,
|
ExperimentalFeaturesUpdate,
|
||||||
|
#[serde(rename = "export")]
|
||||||
|
#[deserr(rename = "export")]
|
||||||
|
Export,
|
||||||
#[serde(rename = "network.get")]
|
#[serde(rename = "network.get")]
|
||||||
#[deserr(rename = "network.get")]
|
#[deserr(rename = "network.get")]
|
||||||
NetworkGet,
|
NetworkGet,
|
||||||
@ -396,6 +417,52 @@ impl Action {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether the action should be included in [Action::AllRead].
|
||||||
|
pub fn is_read(&self) -> bool {
|
||||||
|
use Action::*;
|
||||||
|
|
||||||
|
// It's using an exhaustive match to force the addition of new actions.
|
||||||
|
match self {
|
||||||
|
// Any action that expands to others must return false, as it wouldn't be able to expand recursively.
|
||||||
|
All | AllGet | DocumentsAll | IndexesAll | ChatsAll | TasksAll | SettingsAll
|
||||||
|
| StatsAll | MetricsAll | DumpsAll | SnapshotsAll | ChatsSettingsAll => false,
|
||||||
|
|
||||||
|
Search => true,
|
||||||
|
DocumentsAdd => false,
|
||||||
|
DocumentsGet => true,
|
||||||
|
DocumentsDelete => false,
|
||||||
|
Export => true,
|
||||||
|
IndexesAdd => false,
|
||||||
|
IndexesGet => true,
|
||||||
|
IndexesUpdate => false,
|
||||||
|
IndexesDelete => false,
|
||||||
|
IndexesSwap => false,
|
||||||
|
TasksCancel => false,
|
||||||
|
TasksDelete => false,
|
||||||
|
TasksGet => true,
|
||||||
|
SettingsGet => true,
|
||||||
|
SettingsUpdate => false,
|
||||||
|
StatsGet => true,
|
||||||
|
MetricsGet => true,
|
||||||
|
DumpsCreate => false,
|
||||||
|
SnapshotsCreate => false,
|
||||||
|
Version => true,
|
||||||
|
KeysAdd => false,
|
||||||
|
KeysGet => false, // Disabled in order to prevent privilege escalation
|
||||||
|
KeysUpdate => false,
|
||||||
|
KeysDelete => false,
|
||||||
|
ExperimentalFeaturesGet => true,
|
||||||
|
ExperimentalFeaturesUpdate => false,
|
||||||
|
NetworkGet => true,
|
||||||
|
NetworkUpdate => false,
|
||||||
|
ChatCompletions => false, // Disabled because it might trigger generation of new chats
|
||||||
|
ChatsGet => true,
|
||||||
|
ChatsDelete => false,
|
||||||
|
ChatsSettingsGet => true,
|
||||||
|
ChatsSettingsUpdate => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub const fn repr(&self) -> u8 {
|
pub const fn repr(&self) -> u8 {
|
||||||
*self as u8
|
*self as u8
|
||||||
}
|
}
|
||||||
@ -405,6 +472,7 @@ pub mod actions {
|
|||||||
use super::Action::*;
|
use super::Action::*;
|
||||||
|
|
||||||
pub(crate) const ALL: u8 = All.repr();
|
pub(crate) const ALL: u8 = All.repr();
|
||||||
|
pub const ALL_GET: u8 = AllGet.repr();
|
||||||
pub const SEARCH: u8 = Search.repr();
|
pub const SEARCH: u8 = Search.repr();
|
||||||
pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr();
|
pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr();
|
||||||
pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr();
|
pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr();
|
||||||
@ -438,6 +506,8 @@ pub mod actions {
|
|||||||
pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr();
|
pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr();
|
||||||
pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr();
|
pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr();
|
||||||
|
|
||||||
|
pub const EXPORT: u8 = Export.repr();
|
||||||
|
|
||||||
pub const NETWORK_GET: u8 = NetworkGet.repr();
|
pub const NETWORK_GET: u8 = NetworkGet.repr();
|
||||||
pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr();
|
pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr();
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ pub mod versioning;
|
|||||||
pub use milli::{heed, Index};
|
pub use milli::{heed, Index};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
pub use versioning::VERSION_FILE_NAME;
|
pub use versioning::VERSION_FILE_NAME;
|
||||||
pub use {milli, serde_cs};
|
pub use {byte_unit, milli, serde_cs};
|
||||||
|
|
||||||
pub type Document = serde_json::Map<String, serde_json::Value>;
|
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||||
pub type InstanceUid = Uuid;
|
pub type InstanceUid = Uuid;
|
||||||
|
@ -9,10 +9,11 @@ use std::str::FromStr;
|
|||||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
use milli::disabled_typos_terms::DisabledTyposTerms;
|
use milli::disabled_typos_terms::DisabledTyposTerms;
|
||||||
use milli::index::{IndexEmbeddingConfig, PrefixSearch};
|
use milli::index::PrefixSearch;
|
||||||
use milli::proximity::ProximityPrecision;
|
use milli::proximity::ProximityPrecision;
|
||||||
pub use milli::update::ChatSettings;
|
pub use milli::update::ChatSettings;
|
||||||
use milli::update::Setting;
|
use milli::update::Setting;
|
||||||
|
use milli::vector::db::IndexEmbeddingConfig;
|
||||||
use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET};
|
use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET};
|
||||||
use serde::{Deserialize, Serialize, Serializer};
|
use serde::{Deserialize, Serialize, Serializer};
|
||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
@ -500,8 +501,11 @@ impl Settings<Unchecked> {
|
|||||||
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
|
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
|
||||||
for (name, config) in configs.iter_mut() {
|
for (name, config) in configs.iter_mut() {
|
||||||
let config_to_check = std::mem::take(config);
|
let config_to_check = std::mem::take(config);
|
||||||
let checked_config =
|
let checked_config = milli::update::validate_embedding_settings(
|
||||||
milli::update::validate_embedding_settings(config_to_check.inner, name)?;
|
config_to_check.inner,
|
||||||
|
name,
|
||||||
|
milli::vector::settings::EmbeddingValidationContext::SettingsPartialUpdate,
|
||||||
|
)?;
|
||||||
*config = SettingEmbeddingSettings { inner: checked_config };
|
*config = SettingEmbeddingSettings { inner: checked_config };
|
||||||
}
|
}
|
||||||
self.embedders = Setting::Set(configs);
|
self.embedders = Setting::Set(configs);
|
||||||
@ -751,6 +755,7 @@ pub fn apply_settings_to_builder(
|
|||||||
builder.reset_min_word_len_two_typos();
|
builder.reset_min_word_len_two_typos();
|
||||||
builder.reset_exact_words();
|
builder.reset_exact_words();
|
||||||
builder.reset_exact_attributes();
|
builder.reset_exact_attributes();
|
||||||
|
builder.reset_disable_on_numbers();
|
||||||
}
|
}
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
@ -910,6 +915,7 @@ pub fn settings(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let embedders: BTreeMap<_, _> = index
|
let embedders: BTreeMap<_, _> = index
|
||||||
|
.embedding_configs()
|
||||||
.embedding_configs(rtxn)?
|
.embedding_configs(rtxn)?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|IndexEmbeddingConfig { name, config, .. }| {
|
.map(|IndexEmbeddingConfig { name, config, .. }| {
|
||||||
@ -968,6 +974,7 @@ pub fn settings(
|
|||||||
if let SecretPolicy::HideSecrets = secret_policy {
|
if let SecretPolicy::HideSecrets = secret_policy {
|
||||||
settings.hide_secrets()
|
settings.hide_secrets()
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(settings)
|
Ok(settings)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use byte_unit::UnitType;
|
||||||
use milli::Object;
|
use milli::Object;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use time::{Duration, OffsetDateTime};
|
use time::{Duration, OffsetDateTime};
|
||||||
@ -6,7 +9,9 @@ use utoipa::ToSchema;
|
|||||||
use crate::batches::BatchId;
|
use crate::batches::BatchId;
|
||||||
use crate::error::ResponseError;
|
use crate::error::ResponseError;
|
||||||
use crate::settings::{Settings, Unchecked};
|
use crate::settings::{Settings, Unchecked};
|
||||||
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
|
use crate::tasks::{
|
||||||
|
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
|
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
@ -118,6 +123,15 @@ pub struct DetailsView {
|
|||||||
pub upgrade_from: Option<String>,
|
pub upgrade_from: Option<String>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub upgrade_to: Option<String>,
|
pub upgrade_to: Option<String>,
|
||||||
|
// exporting
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub url: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub api_key: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub payload_size: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub indexes: Option<BTreeMap<String, DetailsExportIndexSettings>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DetailsView {
|
impl DetailsView {
|
||||||
@ -238,6 +252,34 @@ impl DetailsView {
|
|||||||
Some(left)
|
Some(left)
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
url: match (self.url.clone(), other.url.clone()) {
|
||||||
|
(None, None) => None,
|
||||||
|
(None, Some(url)) | (Some(url), None) => Some(url),
|
||||||
|
// We should never be able to batch multiple exports at the same time.
|
||||||
|
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||||
|
(Some(left), Some(_right)) => Some(left),
|
||||||
|
},
|
||||||
|
api_key: match (self.api_key.clone(), other.api_key.clone()) {
|
||||||
|
(None, None) => None,
|
||||||
|
(None, Some(key)) | (Some(key), None) => Some(key),
|
||||||
|
// We should never be able to batch multiple exports at the same time.
|
||||||
|
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||||
|
(Some(left), Some(_right)) => Some(left),
|
||||||
|
},
|
||||||
|
payload_size: match (self.payload_size.clone(), other.payload_size.clone()) {
|
||||||
|
(None, None) => None,
|
||||||
|
(None, Some(size)) | (Some(size), None) => Some(size),
|
||||||
|
// We should never be able to batch multiple exports at the same time.
|
||||||
|
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||||
|
(Some(left), Some(_right)) => Some(left),
|
||||||
|
},
|
||||||
|
indexes: match (self.indexes.clone(), other.indexes.clone()) {
|
||||||
|
(None, None) => None,
|
||||||
|
(None, Some(indexes)) | (Some(indexes), None) => Some(indexes),
|
||||||
|
// We should never be able to batch multiple exports at the same time.
|
||||||
|
// So we return the first one we encounter but that shouldn't be an issue anyway.
|
||||||
|
(Some(left), Some(_right)) => Some(left),
|
||||||
|
},
|
||||||
// We want the earliest version
|
// We want the earliest version
|
||||||
upgrade_from: match (self.upgrade_from.clone(), other.upgrade_from.clone()) {
|
upgrade_from: match (self.upgrade_from.clone(), other.upgrade_from.clone()) {
|
||||||
(None, None) => None,
|
(None, None) => None,
|
||||||
@ -327,6 +369,22 @@ impl From<Details> for DetailsView {
|
|||||||
Details::IndexSwap { swaps } => {
|
Details::IndexSwap { swaps } => {
|
||||||
DetailsView { swaps: Some(swaps), ..Default::default() }
|
DetailsView { swaps: Some(swaps), ..Default::default() }
|
||||||
}
|
}
|
||||||
|
Details::Export { url, api_key, payload_size, indexes } => DetailsView {
|
||||||
|
url: Some(url),
|
||||||
|
api_key: api_key.map(|mut api_key| {
|
||||||
|
hide_secret(&mut api_key);
|
||||||
|
api_key
|
||||||
|
}),
|
||||||
|
payload_size: payload_size
|
||||||
|
.map(|ps| ps.get_appropriate_unit(UnitType::Both).to_string()),
|
||||||
|
indexes: Some(
|
||||||
|
indexes
|
||||||
|
.into_iter()
|
||||||
|
.map(|(pattern, settings)| (pattern.to_string(), settings))
|
||||||
|
.collect(),
|
||||||
|
),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
Details::UpgradeDatabase { from, to } => DetailsView {
|
Details::UpgradeDatabase { from, to } => DetailsView {
|
||||||
upgrade_from: Some(format!("v{}.{}.{}", from.0, from.1, from.2)),
|
upgrade_from: Some(format!("v{}.{}.{}", from.0, from.1, from.2)),
|
||||||
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),
|
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),
|
||||||
@ -335,3 +393,21 @@ impl From<Details> for DetailsView {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We definitely need to factorize the code to hide the secret key
|
||||||
|
fn hide_secret(secret: &mut String) {
|
||||||
|
match secret.len() {
|
||||||
|
x if x < 10 => {
|
||||||
|
secret.replace_range(.., "XXX...");
|
||||||
|
}
|
||||||
|
x if x < 20 => {
|
||||||
|
secret.replace_range(2.., "XXXX...");
|
||||||
|
}
|
||||||
|
x if x < 30 => {
|
||||||
|
secret.replace_range(3.., "XXXXX...");
|
||||||
|
}
|
||||||
|
_x => {
|
||||||
|
secret.replace_range(5.., "XXXXXX...");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,19 +1,22 @@
|
|||||||
use core::fmt;
|
use core::fmt;
|
||||||
use std::collections::HashSet;
|
use std::collections::{BTreeMap, HashSet};
|
||||||
use std::fmt::{Display, Write};
|
use std::fmt::{Display, Write};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use byte_unit::Byte;
|
||||||
use enum_iterator::Sequence;
|
use enum_iterator::Sequence;
|
||||||
use milli::update::IndexDocumentsMethod;
|
use milli::update::IndexDocumentsMethod;
|
||||||
use milli::Object;
|
use milli::Object;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize, Serializer};
|
use serde::{Deserialize, Serialize, Serializer};
|
||||||
|
use serde_json::Value;
|
||||||
use time::{Duration, OffsetDateTime};
|
use time::{Duration, OffsetDateTime};
|
||||||
use utoipa::ToSchema;
|
use utoipa::{schema, ToSchema};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::batches::BatchId;
|
use crate::batches::BatchId;
|
||||||
use crate::error::ResponseError;
|
use crate::error::ResponseError;
|
||||||
|
use crate::index_uid_pattern::IndexUidPattern;
|
||||||
use crate::keys::Key;
|
use crate::keys::Key;
|
||||||
use crate::settings::{Settings, Unchecked};
|
use crate::settings::{Settings, Unchecked};
|
||||||
use crate::{versioning, InstanceUid};
|
use crate::{versioning, InstanceUid};
|
||||||
@ -50,6 +53,7 @@ impl Task {
|
|||||||
| SnapshotCreation
|
| SnapshotCreation
|
||||||
| TaskCancelation { .. }
|
| TaskCancelation { .. }
|
||||||
| TaskDeletion { .. }
|
| TaskDeletion { .. }
|
||||||
|
| Export { .. }
|
||||||
| UpgradeDatabase { .. }
|
| UpgradeDatabase { .. }
|
||||||
| IndexSwap { .. } => None,
|
| IndexSwap { .. } => None,
|
||||||
DocumentAdditionOrUpdate { index_uid, .. }
|
DocumentAdditionOrUpdate { index_uid, .. }
|
||||||
@ -86,6 +90,7 @@ impl Task {
|
|||||||
| KindWithContent::TaskDeletion { .. }
|
| KindWithContent::TaskDeletion { .. }
|
||||||
| KindWithContent::DumpCreation { .. }
|
| KindWithContent::DumpCreation { .. }
|
||||||
| KindWithContent::SnapshotCreation
|
| KindWithContent::SnapshotCreation
|
||||||
|
| KindWithContent::Export { .. }
|
||||||
| KindWithContent::UpgradeDatabase { .. } => None,
|
| KindWithContent::UpgradeDatabase { .. } => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -108,11 +113,11 @@ pub enum KindWithContent {
|
|||||||
},
|
},
|
||||||
DocumentDeletionByFilter {
|
DocumentDeletionByFilter {
|
||||||
index_uid: String,
|
index_uid: String,
|
||||||
filter_expr: serde_json::Value,
|
filter_expr: Value,
|
||||||
},
|
},
|
||||||
DocumentEdition {
|
DocumentEdition {
|
||||||
index_uid: String,
|
index_uid: String,
|
||||||
filter_expr: Option<serde_json::Value>,
|
filter_expr: Option<Value>,
|
||||||
context: Option<milli::Object>,
|
context: Option<milli::Object>,
|
||||||
function: String,
|
function: String,
|
||||||
},
|
},
|
||||||
@ -152,6 +157,12 @@ pub enum KindWithContent {
|
|||||||
instance_uid: Option<InstanceUid>,
|
instance_uid: Option<InstanceUid>,
|
||||||
},
|
},
|
||||||
SnapshotCreation,
|
SnapshotCreation,
|
||||||
|
Export {
|
||||||
|
url: String,
|
||||||
|
api_key: Option<String>,
|
||||||
|
payload_size: Option<Byte>,
|
||||||
|
indexes: BTreeMap<IndexUidPattern, ExportIndexSettings>,
|
||||||
|
},
|
||||||
UpgradeDatabase {
|
UpgradeDatabase {
|
||||||
from: (u32, u32, u32),
|
from: (u32, u32, u32),
|
||||||
},
|
},
|
||||||
@ -163,6 +174,13 @@ pub struct IndexSwap {
|
|||||||
pub indexes: (String, String),
|
pub indexes: (String, String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ExportIndexSettings {
|
||||||
|
pub filter: Option<Value>,
|
||||||
|
pub override_settings: bool,
|
||||||
|
}
|
||||||
|
|
||||||
impl KindWithContent {
|
impl KindWithContent {
|
||||||
pub fn as_kind(&self) -> Kind {
|
pub fn as_kind(&self) -> Kind {
|
||||||
match self {
|
match self {
|
||||||
@ -180,6 +198,7 @@ impl KindWithContent {
|
|||||||
KindWithContent::TaskDeletion { .. } => Kind::TaskDeletion,
|
KindWithContent::TaskDeletion { .. } => Kind::TaskDeletion,
|
||||||
KindWithContent::DumpCreation { .. } => Kind::DumpCreation,
|
KindWithContent::DumpCreation { .. } => Kind::DumpCreation,
|
||||||
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
|
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
|
||||||
|
KindWithContent::Export { .. } => Kind::Export,
|
||||||
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
|
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -192,6 +211,7 @@ impl KindWithContent {
|
|||||||
| SnapshotCreation
|
| SnapshotCreation
|
||||||
| TaskCancelation { .. }
|
| TaskCancelation { .. }
|
||||||
| TaskDeletion { .. }
|
| TaskDeletion { .. }
|
||||||
|
| Export { .. }
|
||||||
| UpgradeDatabase { .. } => vec![],
|
| UpgradeDatabase { .. } => vec![],
|
||||||
DocumentAdditionOrUpdate { index_uid, .. }
|
DocumentAdditionOrUpdate { index_uid, .. }
|
||||||
| DocumentEdition { index_uid, .. }
|
| DocumentEdition { index_uid, .. }
|
||||||
@ -269,6 +289,14 @@ impl KindWithContent {
|
|||||||
}),
|
}),
|
||||||
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
||||||
KindWithContent::SnapshotCreation => None,
|
KindWithContent::SnapshotCreation => None,
|
||||||
|
KindWithContent::Export { url, api_key, payload_size, indexes } => {
|
||||||
|
Some(Details::Export {
|
||||||
|
url: url.clone(),
|
||||||
|
api_key: api_key.clone(),
|
||||||
|
payload_size: *payload_size,
|
||||||
|
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
|
||||||
|
})
|
||||||
|
}
|
||||||
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
||||||
from: (from.0, from.1, from.2),
|
from: (from.0, from.1, from.2),
|
||||||
to: (
|
to: (
|
||||||
@ -335,6 +363,14 @@ impl KindWithContent {
|
|||||||
}),
|
}),
|
||||||
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
||||||
KindWithContent::SnapshotCreation => None,
|
KindWithContent::SnapshotCreation => None,
|
||||||
|
KindWithContent::Export { url, api_key, payload_size, indexes } => {
|
||||||
|
Some(Details::Export {
|
||||||
|
url: url.clone(),
|
||||||
|
api_key: api_key.clone(),
|
||||||
|
payload_size: *payload_size,
|
||||||
|
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
|
||||||
|
})
|
||||||
|
}
|
||||||
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
||||||
from: *from,
|
from: *from,
|
||||||
to: (
|
to: (
|
||||||
@ -383,6 +419,14 @@ impl From<&KindWithContent> for Option<Details> {
|
|||||||
}),
|
}),
|
||||||
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
|
||||||
KindWithContent::SnapshotCreation => None,
|
KindWithContent::SnapshotCreation => None,
|
||||||
|
KindWithContent::Export { url, api_key, payload_size, indexes } => {
|
||||||
|
Some(Details::Export {
|
||||||
|
url: url.clone(),
|
||||||
|
api_key: api_key.clone(),
|
||||||
|
payload_size: *payload_size,
|
||||||
|
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
|
||||||
|
})
|
||||||
|
}
|
||||||
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
|
||||||
from: *from,
|
from: *from,
|
||||||
to: (
|
to: (
|
||||||
@ -499,6 +543,7 @@ pub enum Kind {
|
|||||||
TaskDeletion,
|
TaskDeletion,
|
||||||
DumpCreation,
|
DumpCreation,
|
||||||
SnapshotCreation,
|
SnapshotCreation,
|
||||||
|
Export,
|
||||||
UpgradeDatabase,
|
UpgradeDatabase,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -516,6 +561,7 @@ impl Kind {
|
|||||||
| Kind::TaskCancelation
|
| Kind::TaskCancelation
|
||||||
| Kind::TaskDeletion
|
| Kind::TaskDeletion
|
||||||
| Kind::DumpCreation
|
| Kind::DumpCreation
|
||||||
|
| Kind::Export
|
||||||
| Kind::UpgradeDatabase
|
| Kind::UpgradeDatabase
|
||||||
| Kind::SnapshotCreation => false,
|
| Kind::SnapshotCreation => false,
|
||||||
}
|
}
|
||||||
@ -536,6 +582,7 @@ impl Display for Kind {
|
|||||||
Kind::TaskDeletion => write!(f, "taskDeletion"),
|
Kind::TaskDeletion => write!(f, "taskDeletion"),
|
||||||
Kind::DumpCreation => write!(f, "dumpCreation"),
|
Kind::DumpCreation => write!(f, "dumpCreation"),
|
||||||
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
|
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
|
||||||
|
Kind::Export => write!(f, "export"),
|
||||||
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
|
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -568,6 +615,8 @@ impl FromStr for Kind {
|
|||||||
Ok(Kind::DumpCreation)
|
Ok(Kind::DumpCreation)
|
||||||
} else if kind.eq_ignore_ascii_case("snapshotCreation") {
|
} else if kind.eq_ignore_ascii_case("snapshotCreation") {
|
||||||
Ok(Kind::SnapshotCreation)
|
Ok(Kind::SnapshotCreation)
|
||||||
|
} else if kind.eq_ignore_ascii_case("export") {
|
||||||
|
Ok(Kind::Export)
|
||||||
} else if kind.eq_ignore_ascii_case("upgradeDatabase") {
|
} else if kind.eq_ignore_ascii_case("upgradeDatabase") {
|
||||||
Ok(Kind::UpgradeDatabase)
|
Ok(Kind::UpgradeDatabase)
|
||||||
} else {
|
} else {
|
||||||
@ -643,12 +692,33 @@ pub enum Details {
|
|||||||
IndexSwap {
|
IndexSwap {
|
||||||
swaps: Vec<IndexSwap>,
|
swaps: Vec<IndexSwap>,
|
||||||
},
|
},
|
||||||
|
Export {
|
||||||
|
url: String,
|
||||||
|
api_key: Option<String>,
|
||||||
|
payload_size: Option<Byte>,
|
||||||
|
indexes: BTreeMap<IndexUidPattern, DetailsExportIndexSettings>,
|
||||||
|
},
|
||||||
UpgradeDatabase {
|
UpgradeDatabase {
|
||||||
from: (u32, u32, u32),
|
from: (u32, u32, u32),
|
||||||
to: (u32, u32, u32),
|
to: (u32, u32, u32),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||||
|
#[schema(rename_all = "camelCase")]
|
||||||
|
pub struct DetailsExportIndexSettings {
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub settings: ExportIndexSettings,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub matched_documents: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ExportIndexSettings> for DetailsExportIndexSettings {
|
||||||
|
fn from(settings: ExportIndexSettings) -> Self {
|
||||||
|
DetailsExportIndexSettings { settings, matched_documents: None }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Details {
|
impl Details {
|
||||||
pub fn to_failed(&self) -> Self {
|
pub fn to_failed(&self) -> Self {
|
||||||
let mut details = self.clone();
|
let mut details = self.clone();
|
||||||
@ -667,6 +737,7 @@ impl Details {
|
|||||||
Self::SettingsUpdate { .. }
|
Self::SettingsUpdate { .. }
|
||||||
| Self::IndexInfo { .. }
|
| Self::IndexInfo { .. }
|
||||||
| Self::Dump { .. }
|
| Self::Dump { .. }
|
||||||
|
| Self::Export { .. }
|
||||||
| Self::UpgradeDatabase { .. }
|
| Self::UpgradeDatabase { .. }
|
||||||
| Self::IndexSwap { .. } => (),
|
| Self::IndexSwap { .. } => (),
|
||||||
}
|
}
|
||||||
|
@ -13,51 +13,50 @@ license.workspace = true
|
|||||||
default-run = "meilisearch"
|
default-run = "meilisearch"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
actix-cors = "0.7.0"
|
actix-cors = "0.7.1"
|
||||||
actix-http = { version = "3.9.0", default-features = false, features = [
|
actix-http = { version = "3.11.0", default-features = false, features = [
|
||||||
"compress-brotli",
|
"compress-brotli",
|
||||||
"compress-gzip",
|
"compress-gzip",
|
||||||
"rustls-0_23",
|
"rustls-0_23",
|
||||||
] }
|
] }
|
||||||
actix-utils = "3.0.1"
|
actix-utils = "3.0.1"
|
||||||
actix-web = { version = "4.9.0", default-features = false, features = [
|
actix-web = { version = "4.11.0", default-features = false, features = [
|
||||||
"macros",
|
"macros",
|
||||||
"compress-brotli",
|
"compress-brotli",
|
||||||
"compress-gzip",
|
"compress-gzip",
|
||||||
"cookies",
|
"cookies",
|
||||||
"rustls-0_23",
|
"rustls-0_23",
|
||||||
] }
|
] }
|
||||||
anyhow = { version = "1.0.95", features = ["backtrace"] }
|
anyhow = { version = "1.0.98", features = ["backtrace"] }
|
||||||
async-trait = "0.1.85"
|
bstr = "1.12.0"
|
||||||
bstr = "1.11.3"
|
|
||||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||||
bytes = "1.9.0"
|
bytes = "1.10.1"
|
||||||
bumpalo = "3.16.0"
|
bumpalo = "3.18.1"
|
||||||
clap = { version = "4.5.24", features = ["derive", "env"] }
|
clap = { version = "4.5.40", features = ["derive", "env"] }
|
||||||
crossbeam-channel = "0.5.15"
|
crossbeam-channel = "0.5.15"
|
||||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||||
dump = { path = "../dump" }
|
dump = { path = "../dump" }
|
||||||
either = "1.13.0"
|
either = "1.15.0"
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
flate2 = "1.0.35"
|
flate2 = "1.1.2"
|
||||||
fst = "0.4.7"
|
fst = "0.4.7"
|
||||||
futures = "0.3.31"
|
futures = "0.3.31"
|
||||||
futures-util = "0.3.31"
|
futures-util = "0.3.31"
|
||||||
index-scheduler = { path = "../index-scheduler" }
|
index-scheduler = { path = "../index-scheduler" }
|
||||||
indexmap = { version = "2.7.0", features = ["serde"] }
|
indexmap = { version = "2.9.0", features = ["serde"] }
|
||||||
is-terminal = "0.4.13"
|
is-terminal = "0.4.16"
|
||||||
itertools = "0.14.0"
|
itertools = "0.14.0"
|
||||||
jsonwebtoken = "9.3.0"
|
jsonwebtoken = "9.3.1"
|
||||||
lazy_static = "1.5.0"
|
lazy_static = "1.5.0"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
mimalloc = { version = "0.1.43", default-features = false }
|
mimalloc = { version = "0.1.47", default-features = false }
|
||||||
mime = "0.3.17"
|
mime = "0.3.17"
|
||||||
num_cpus = "1.16.0"
|
num_cpus = "1.17.0"
|
||||||
obkv = "0.3.0"
|
obkv = "0.3.0"
|
||||||
once_cell = "1.20.2"
|
once_cell = "1.21.3"
|
||||||
ordered-float = "4.6.0"
|
ordered-float = "5.0.0"
|
||||||
parking_lot = "0.12.3"
|
parking_lot = "0.12.4"
|
||||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||||
pin-project-lite = "0.2.16"
|
pin-project-lite = "0.2.16"
|
||||||
platform-dirs = "0.3.0"
|
platform-dirs = "0.3.0"
|
||||||
@ -65,44 +64,44 @@ prometheus = { version = "0.14.0", features = ["process"] }
|
|||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
rayon = "1.10.0"
|
rayon = "1.10.0"
|
||||||
regex = "1.11.1"
|
regex = "1.11.1"
|
||||||
reqwest = { version = "0.12.12", features = [
|
reqwest = { version = "0.12.20", features = [
|
||||||
"rustls-tls",
|
"rustls-tls",
|
||||||
"json",
|
"json",
|
||||||
], default-features = false }
|
], default-features = false }
|
||||||
rustls = { version = "0.23.20", features = ["ring"], default-features = false }
|
rustls = { version = "0.23.28", features = ["ring"], default-features = false }
|
||||||
rustls-pki-types = { version = "1.10.1", features = ["alloc"] }
|
rustls-pki-types = { version = "1.12.0", features = ["alloc"] }
|
||||||
rustls-pemfile = "2.2.0"
|
rustls-pemfile = "2.2.0"
|
||||||
segment = { version = "0.2.5" }
|
segment = { version = "0.2.6" }
|
||||||
serde = { version = "1.0.217", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||||
sha2 = "0.10.8"
|
sha2 = "0.10.9"
|
||||||
siphasher = "1.0.1"
|
siphasher = "1.0.1"
|
||||||
slice-group-by = "0.3.1"
|
slice-group-by = "0.3.1"
|
||||||
static-files = { version = "0.2.4", optional = true }
|
static-files = { version = "0.2.5", optional = true }
|
||||||
sysinfo = "0.33.1"
|
sysinfo = "0.35.2"
|
||||||
tar = "0.4.43"
|
tar = "0.4.44"
|
||||||
tempfile = "3.15.0"
|
tempfile = "3.20.0"
|
||||||
thiserror = "2.0.9"
|
thiserror = "2.0.12"
|
||||||
time = { version = "0.3.37", features = [
|
time = { version = "0.3.41", features = [
|
||||||
"serde-well-known",
|
"serde-well-known",
|
||||||
"formatting",
|
"formatting",
|
||||||
"parsing",
|
"parsing",
|
||||||
"macros",
|
"macros",
|
||||||
] }
|
] }
|
||||||
tokio = { version = "1.43.1", features = ["full"] }
|
tokio = { version = "1.45.1", features = ["full"] }
|
||||||
toml = "0.8.19"
|
toml = "0.8.23"
|
||||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||||
serde_urlencoded = "0.7.1"
|
serde_urlencoded = "0.7.1"
|
||||||
termcolor = "1.4.1"
|
termcolor = "1.4.1"
|
||||||
url = { version = "2.5.4", features = ["serde"] }
|
url = { version = "2.5.4", features = ["serde"] }
|
||||||
tracing = "0.1.41"
|
tracing = "0.1.41"
|
||||||
tracing-subscriber = { version = "0.3.19", features = ["json"] }
|
tracing-subscriber = { version = "0.3.19", features = ["json"] }
|
||||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||||
tracing-actix-web = "0.7.15"
|
tracing-actix-web = "0.7.18"
|
||||||
build-info = { version = "1.7.0", path = "../build-info" }
|
build-info = { version = "1.7.0", path = "../build-info" }
|
||||||
roaring = "0.10.10"
|
roaring = "0.10.12"
|
||||||
mopa-maintained = "0.2.3"
|
mopa-maintained = "0.2.3"
|
||||||
utoipa = { version = "5.3.1", features = [
|
utoipa = { version = "5.4.0", features = [
|
||||||
"actix_extras",
|
"actix_extras",
|
||||||
"macros",
|
"macros",
|
||||||
"non_strict_integers",
|
"non_strict_integers",
|
||||||
@ -118,7 +117,7 @@ actix-web-lab = { version = "0.24.1", default-features = false }
|
|||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
actix-rt = "2.10.0"
|
actix-rt = "2.10.0"
|
||||||
brotli = "6.0.0"
|
brotli = "8.0.1"
|
||||||
# fixed version due to format breakages in v1.40
|
# fixed version due to format breakages in v1.40
|
||||||
insta = { version = "=1.39.0", features = ["redactions"] }
|
insta = { version = "=1.39.0", features = ["redactions"] }
|
||||||
manifest-dir-macros = "0.1.18"
|
manifest-dir-macros = "0.1.18"
|
||||||
@ -126,21 +125,21 @@ maplit = "1.0.2"
|
|||||||
meili-snap = { path = "../meili-snap" }
|
meili-snap = { path = "../meili-snap" }
|
||||||
temp-env = "0.3.6"
|
temp-env = "0.3.6"
|
||||||
urlencoding = "2.1.3"
|
urlencoding = "2.1.3"
|
||||||
wiremock = "0.6.2"
|
wiremock = "0.6.3"
|
||||||
yaup = "0.3.1"
|
yaup = "0.3.1"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = { version = "1.0.95", optional = true }
|
anyhow = { version = "1.0.98", optional = true }
|
||||||
cargo_toml = { version = "0.21.0", optional = true }
|
cargo_toml = { version = "0.22.1", optional = true }
|
||||||
hex = { version = "0.4.3", optional = true }
|
hex = { version = "0.4.3", optional = true }
|
||||||
reqwest = { version = "0.12.12", features = [
|
reqwest = { version = "0.12.20", features = [
|
||||||
"blocking",
|
"blocking",
|
||||||
"rustls-tls",
|
"rustls-tls",
|
||||||
], default-features = false, optional = true }
|
], default-features = false, optional = true }
|
||||||
sha-1 = { version = "0.10.1", optional = true }
|
sha-1 = { version = "0.10.1", optional = true }
|
||||||
static-files = { version = "0.2.4", optional = true }
|
static-files = { version = "0.2.5", optional = true }
|
||||||
tempfile = { version = "3.15.0", optional = true }
|
tempfile = { version = "3.20.0", optional = true }
|
||||||
zip = { version = "2.3.0", optional = true }
|
zip = { version = "4.1.0", optional = true }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
|
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||||
@ -170,5 +169,5 @@ german = ["meilisearch-types/german"]
|
|||||||
turkish = ["meilisearch-types/turkish"]
|
turkish = ["meilisearch-types/turkish"]
|
||||||
|
|
||||||
[package.metadata.mini-dashboard]
|
[package.metadata.mini-dashboard]
|
||||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.19/build.zip"
|
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.20/build.zip"
|
||||||
sha1 = "7974430d5277c97f67cf6e95eec6faaac2788834"
|
sha1 = "82a7ddd7bf14bb5323c3d235d2b62892a98b6a59"
|
||||||
|
@ -104,6 +104,4 @@ impl Analytics for MockAnalytics {
|
|||||||
_request: &HttpRequest,
|
_request: &HttpRequest,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
|
||||||
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
|
||||||
}
|
}
|
||||||
|
@ -73,12 +73,6 @@ pub enum DocumentDeletionKind {
|
|||||||
PerFilter,
|
PerFilter,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
|
||||||
pub enum DocumentFetchKind {
|
|
||||||
PerDocumentId { retrieve_vectors: bool },
|
|
||||||
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
|
|
||||||
}
|
|
||||||
|
|
||||||
/// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
|
/// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
|
||||||
pub trait Aggregate: 'static + mopa::Any + Send {
|
pub trait Aggregate: 'static + mopa::Any + Send {
|
||||||
/// The name of the event that will be sent to segment.
|
/// The name of the event that will be sent to segment.
|
||||||
|
@ -197,11 +197,13 @@ struct Infos {
|
|||||||
experimental_max_number_of_batched_tasks: usize,
|
experimental_max_number_of_batched_tasks: usize,
|
||||||
experimental_limit_batched_tasks_total_size: u64,
|
experimental_limit_batched_tasks_total_size: u64,
|
||||||
experimental_network: bool,
|
experimental_network: bool,
|
||||||
|
experimental_multimodal: bool,
|
||||||
experimental_chat_completions: bool,
|
experimental_chat_completions: bool,
|
||||||
experimental_get_task_documents_route: bool,
|
experimental_get_task_documents_route: bool,
|
||||||
experimental_composite_embedders: bool,
|
experimental_composite_embedders: bool,
|
||||||
experimental_embedding_cache_entries: usize,
|
experimental_embedding_cache_entries: usize,
|
||||||
experimental_no_snapshot_compaction: bool,
|
experimental_no_snapshot_compaction: bool,
|
||||||
|
experimental_no_edition_2024_for_settings: bool,
|
||||||
gpu_enabled: bool,
|
gpu_enabled: bool,
|
||||||
db_path: bool,
|
db_path: bool,
|
||||||
import_dump: bool,
|
import_dump: bool,
|
||||||
@ -286,8 +288,12 @@ impl Infos {
|
|||||||
ScheduleSnapshot::Enabled(interval) => Some(interval),
|
ScheduleSnapshot::Enabled(interval) => Some(interval),
|
||||||
};
|
};
|
||||||
|
|
||||||
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
|
let IndexerOpts {
|
||||||
indexer_options;
|
max_indexing_memory,
|
||||||
|
max_indexing_threads,
|
||||||
|
skip_index_budget: _,
|
||||||
|
experimental_no_edition_2024_for_settings,
|
||||||
|
} = indexer_options;
|
||||||
|
|
||||||
let RuntimeTogglableFeatures {
|
let RuntimeTogglableFeatures {
|
||||||
metrics,
|
metrics,
|
||||||
@ -298,6 +304,7 @@ impl Infos {
|
|||||||
get_task_documents_route,
|
get_task_documents_route,
|
||||||
composite_embedders,
|
composite_embedders,
|
||||||
chat_completions,
|
chat_completions,
|
||||||
|
multimodal,
|
||||||
} = features;
|
} = features;
|
||||||
|
|
||||||
// We're going to override every sensible information.
|
// We're going to override every sensible information.
|
||||||
@ -317,6 +324,7 @@ impl Infos {
|
|||||||
experimental_reduce_indexing_memory_usage,
|
experimental_reduce_indexing_memory_usage,
|
||||||
experimental_network: network,
|
experimental_network: network,
|
||||||
experimental_chat_completions: chat_completions,
|
experimental_chat_completions: chat_completions,
|
||||||
|
experimental_multimodal: multimodal,
|
||||||
experimental_get_task_documents_route: get_task_documents_route,
|
experimental_get_task_documents_route: get_task_documents_route,
|
||||||
experimental_composite_embedders: composite_embedders,
|
experimental_composite_embedders: composite_embedders,
|
||||||
experimental_embedding_cache_entries,
|
experimental_embedding_cache_entries,
|
||||||
@ -350,6 +358,7 @@ impl Infos {
|
|||||||
ssl_require_auth,
|
ssl_require_auth,
|
||||||
ssl_resumption,
|
ssl_resumption,
|
||||||
ssl_tickets,
|
ssl_tickets,
|
||||||
|
experimental_no_edition_2024_for_settings,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -49,7 +49,7 @@ pub enum MeilisearchHttpError {
|
|||||||
TooManySearchRequests(usize),
|
TooManySearchRequests(usize),
|
||||||
#[error("Internal error: Search limiter is down.")]
|
#[error("Internal error: Search limiter is down.")]
|
||||||
SearchLimiterIsDown,
|
SearchLimiterIsDown,
|
||||||
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
|
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(if *.0 % 1024 == 0 { UnitType::Binary } else { UnitType::Decimal }))]
|
||||||
PayloadTooLarge(usize),
|
PayloadTooLarge(usize),
|
||||||
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
|
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
|
||||||
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
|
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
|
||||||
@ -76,8 +76,10 @@ pub enum MeilisearchHttpError {
|
|||||||
DocumentFormat(#[from] DocumentFormatError),
|
DocumentFormat(#[from] DocumentFormatError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Join(#[from] JoinError),
|
Join(#[from] JoinError),
|
||||||
#[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
|
#[error("Invalid request: missing `hybrid` parameter when `vector` or `media` are present.")]
|
||||||
MissingSearchHybrid,
|
MissingSearchHybrid,
|
||||||
|
#[error("Invalid request: both `media` and `vector` parameters are present.")]
|
||||||
|
MediaAndVector,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MeilisearchHttpError {
|
impl MeilisearchHttpError {
|
||||||
@ -111,6 +113,7 @@ impl ErrorCode for MeilisearchHttpError {
|
|||||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||||
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
||||||
|
MeilisearchHttpError::MediaAndVector => Code::InvalidSearchMediaAndVector,
|
||||||
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
|
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
|
||||||
Code::InvalidMultiSearchFederationOptions
|
Code::InvalidMultiSearchFederationOptions
|
||||||
}
|
}
|
||||||
|
@ -37,6 +37,7 @@ use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
|||||||
use meilisearch_auth::{open_auth_store_env, AuthController};
|
use meilisearch_auth::{open_auth_store_env, AuthController};
|
||||||
use meilisearch_types::milli::constants::VERSION_MAJOR;
|
use meilisearch_types::milli::constants::VERSION_MAJOR;
|
||||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
|
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||||
use meilisearch_types::milli::update::{
|
use meilisearch_types::milli::update::{
|
||||||
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
|
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
|
||||||
};
|
};
|
||||||
@ -463,6 +464,7 @@ fn import_dump(
|
|||||||
index_scheduler: &mut IndexScheduler,
|
index_scheduler: &mut IndexScheduler,
|
||||||
auth: &mut AuthController,
|
auth: &mut AuthController,
|
||||||
) -> Result<(), anyhow::Error> {
|
) -> Result<(), anyhow::Error> {
|
||||||
|
let progress = Progress::default();
|
||||||
let reader = File::open(dump_path)?;
|
let reader = File::open(dump_path)?;
|
||||||
let mut dump_reader = dump::DumpReader::open(reader)?;
|
let mut dump_reader = dump::DumpReader::open(reader)?;
|
||||||
|
|
||||||
@ -496,14 +498,20 @@ fn import_dump(
|
|||||||
keys.push(key);
|
keys.push(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Import the runtime features and network
|
// 3. Import the `ChatCompletionSettings`s.
|
||||||
|
for result in dump_reader.chat_completions_settings()? {
|
||||||
|
let (name, settings) = result?;
|
||||||
|
index_scheduler.put_chat_settings(&name, &settings)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Import the runtime features and network
|
||||||
let features = dump_reader.features()?.unwrap_or_default();
|
let features = dump_reader.features()?.unwrap_or_default();
|
||||||
index_scheduler.put_runtime_features(features)?;
|
index_scheduler.put_runtime_features(features)?;
|
||||||
|
|
||||||
let network = dump_reader.network()?.cloned().unwrap_or_default();
|
let network = dump_reader.network()?.cloned().unwrap_or_default();
|
||||||
index_scheduler.put_network(network)?;
|
index_scheduler.put_network(network)?;
|
||||||
|
|
||||||
// 3.1 Use all cpus to process dump if `max_indexing_threads` not configured
|
// 4.1 Use all cpus to process dump if `max_indexing_threads` not configured
|
||||||
let backup_config;
|
let backup_config;
|
||||||
let base_config = index_scheduler.indexer_config();
|
let base_config = index_scheduler.indexer_config();
|
||||||
|
|
||||||
@ -520,7 +528,7 @@ fn import_dump(
|
|||||||
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
|
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
|
||||||
// try to process tasks while we're trying to import the indexes.
|
// try to process tasks while we're trying to import the indexes.
|
||||||
|
|
||||||
// 4. Import the indexes.
|
// 5. Import the indexes.
|
||||||
for index_reader in dump_reader.indexes()? {
|
for index_reader in dump_reader.indexes()? {
|
||||||
let mut index_reader = index_reader?;
|
let mut index_reader = index_reader?;
|
||||||
let metadata = index_reader.metadata();
|
let metadata = index_reader.metadata();
|
||||||
@ -533,20 +541,20 @@ fn import_dump(
|
|||||||
let mut wtxn = index.write_txn()?;
|
let mut wtxn = index.write_txn()?;
|
||||||
|
|
||||||
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
|
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
|
||||||
// 4.1 Import the primary key if there is one.
|
// 5.1 Import the primary key if there is one.
|
||||||
if let Some(ref primary_key) = metadata.primary_key {
|
if let Some(ref primary_key) = metadata.primary_key {
|
||||||
builder.set_primary_key(primary_key.to_string());
|
builder.set_primary_key(primary_key.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4.2 Import the settings.
|
// 5.2 Import the settings.
|
||||||
tracing::info!("Importing the settings.");
|
tracing::info!("Importing the settings.");
|
||||||
let settings = index_reader.settings()?;
|
let settings = index_reader.settings()?;
|
||||||
apply_settings_to_builder(&settings, &mut builder);
|
apply_settings_to_builder(&settings, &mut builder);
|
||||||
builder
|
let embedder_stats: Arc<EmbedderStats> = Default::default();
|
||||||
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
|
builder.execute(&|| false, &progress, embedder_stats.clone())?;
|
||||||
|
|
||||||
// 4.3 Import the documents.
|
// 5.3 Import the documents.
|
||||||
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||||
tracing::info!("Importing the documents.");
|
tracing::info!("Importing the documents.");
|
||||||
let file = tempfile::tempfile()?;
|
let file = tempfile::tempfile()?;
|
||||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
||||||
@ -557,11 +565,11 @@ fn import_dump(
|
|||||||
// This flush the content of the batch builder.
|
// This flush the content of the batch builder.
|
||||||
let file = builder.into_inner()?.into_inner()?;
|
let file = builder.into_inner()?.into_inner()?;
|
||||||
|
|
||||||
// 4.3.2 We feed it to the milli index.
|
// 5.3.2 We feed it to the milli index.
|
||||||
let reader = BufReader::new(file);
|
let reader = BufReader::new(file);
|
||||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||||
|
|
||||||
let embedder_configs = index.embedding_configs(&wtxn)?;
|
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
|
||||||
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
|
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
|
||||||
|
|
||||||
let builder = milli::update::IndexDocuments::new(
|
let builder = milli::update::IndexDocuments::new(
|
||||||
@ -574,6 +582,7 @@ fn import_dump(
|
|||||||
},
|
},
|
||||||
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
||||||
|| false,
|
|| false,
|
||||||
|
&embedder_stats,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let builder = builder.with_embedders(embedders);
|
let builder = builder.with_embedders(embedders);
|
||||||
@ -588,15 +597,15 @@ fn import_dump(
|
|||||||
index_scheduler.refresh_index_stats(&uid)?;
|
index_scheduler.refresh_index_stats(&uid)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5. Import the queue
|
// 6. Import the queue
|
||||||
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
||||||
// 5.1. Import the batches
|
// 6.1. Import the batches
|
||||||
for ret in dump_reader.batches()? {
|
for ret in dump_reader.batches()? {
|
||||||
let batch = ret?;
|
let batch = ret?;
|
||||||
index_scheduler_dump.register_dumped_batch(batch)?;
|
index_scheduler_dump.register_dumped_batch(batch)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5.2. Import the tasks
|
// 6.2. Import the tasks
|
||||||
for ret in dump_reader.tasks()? {
|
for ret in dump_reader.tasks()? {
|
||||||
let (task, file) = ret?;
|
let (task, file) = ret?;
|
||||||
index_scheduler_dump.register_dumped_task(task, file)?;
|
index_scheduler_dump.register_dumped_task(task, file)?;
|
||||||
|
@ -15,6 +15,33 @@ lazy_static! {
|
|||||||
"Meilisearch number of degraded search requests"
|
"Meilisearch number of degraded search requests"
|
||||||
))
|
))
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_CHAT_SEARCH_REQUESTS: IntCounterVec = register_int_counter_vec!(
|
||||||
|
opts!(
|
||||||
|
"meilisearch_chat_search_requests",
|
||||||
|
"Meilisearch number of search requests performed by the chat route itself"
|
||||||
|
),
|
||||||
|
&["type"]
|
||||||
|
)
|
||||||
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
|
||||||
|
opts!("meilisearch_chat_prompt_tokens_usage", "Meilisearch Chat Prompt Tokens Usage"),
|
||||||
|
&["workspace", "model"]
|
||||||
|
)
|
||||||
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE: IntCounterVec =
|
||||||
|
register_int_counter_vec!(
|
||||||
|
opts!(
|
||||||
|
"meilisearch_chat_completion_tokens_usage",
|
||||||
|
"Meilisearch Chat Completion Tokens Usage"
|
||||||
|
),
|
||||||
|
&["workspace", "model"]
|
||||||
|
)
|
||||||
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
|
||||||
|
opts!("meilisearch_chat_total_tokens_usage", "Meilisearch Chat Total Tokens Usage"),
|
||||||
|
&["workspace", "model"]
|
||||||
|
)
|
||||||
|
.expect("Can't create a metric");
|
||||||
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
||||||
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
|
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
|
@ -53,6 +53,8 @@ const MEILI_EXPERIMENTAL_DUMPLESS_UPGRADE: &str = "MEILI_EXPERIMENTAL_DUMPLESS_U
|
|||||||
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
|
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
|
||||||
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
|
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
|
||||||
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
|
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
|
||||||
|
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS: &str =
|
||||||
|
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS";
|
||||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||||
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
||||||
@ -62,7 +64,7 @@ const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
|||||||
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
|
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
|
||||||
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
|
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
|
||||||
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
|
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
|
||||||
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE";
|
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE";
|
||||||
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
||||||
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
|
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
|
||||||
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
||||||
@ -749,12 +751,25 @@ pub struct IndexerOpts {
|
|||||||
#[clap(skip)]
|
#[clap(skip)]
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
pub skip_index_budget: bool,
|
pub skip_index_budget: bool,
|
||||||
|
|
||||||
|
/// Experimental no edition 2024 for settings feature. For more information,
|
||||||
|
/// see: <https://github.com/orgs/meilisearch/discussions/847>
|
||||||
|
///
|
||||||
|
/// Enables the experimental no edition 2024 for settings feature.
|
||||||
|
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub experimental_no_edition_2024_for_settings: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexerOpts {
|
impl IndexerOpts {
|
||||||
/// Exports the values to their corresponding env vars if they are not set.
|
/// Exports the values to their corresponding env vars if they are not set.
|
||||||
pub fn export_to_env(self) {
|
pub fn export_to_env(self) {
|
||||||
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = self;
|
let IndexerOpts {
|
||||||
|
max_indexing_memory,
|
||||||
|
max_indexing_threads,
|
||||||
|
skip_index_budget: _,
|
||||||
|
experimental_no_edition_2024_for_settings,
|
||||||
|
} = self;
|
||||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||||
export_to_env_if_not_present(
|
export_to_env_if_not_present(
|
||||||
MEILI_MAX_INDEXING_MEMORY,
|
MEILI_MAX_INDEXING_MEMORY,
|
||||||
@ -767,6 +782,12 @@ impl IndexerOpts {
|
|||||||
max_indexing_threads.to_string(),
|
max_indexing_threads.to_string(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
if experimental_no_edition_2024_for_settings {
|
||||||
|
export_to_env_if_not_present(
|
||||||
|
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS,
|
||||||
|
experimental_no_edition_2024_for_settings.to_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -785,7 +806,12 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
|||||||
max_threads: *other.max_indexing_threads,
|
max_threads: *other.max_indexing_threads,
|
||||||
max_positions_per_attributes: None,
|
max_positions_per_attributes: None,
|
||||||
skip_index_budget: other.skip_index_budget,
|
skip_index_budget: other.skip_index_budget,
|
||||||
..Default::default()
|
experimental_no_edition_2024_for_settings: other
|
||||||
|
.experimental_no_edition_2024_for_settings,
|
||||||
|
chunk_compression_type: Default::default(),
|
||||||
|
chunk_compression_level: Default::default(),
|
||||||
|
documents_chunk_size: Default::default(),
|
||||||
|
max_nb_chunks: Default::default(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
135
crates/meilisearch/src/routes/chats/chat_completion_analytics.rs
Normal file
135
crates/meilisearch/src/routes/chats/chat_completion_analytics.rs
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
use std::collections::BinaryHeap;
|
||||||
|
|
||||||
|
use serde_json::{json, Value};
|
||||||
|
|
||||||
|
use crate::analytics::Aggregate;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct ChatCompletionAggregator {
|
||||||
|
// requests
|
||||||
|
total_received: usize,
|
||||||
|
total_succeeded: usize,
|
||||||
|
time_spent: BinaryHeap<usize>,
|
||||||
|
|
||||||
|
// chat completion specific metrics
|
||||||
|
total_messages: usize,
|
||||||
|
total_streamed_requests: usize,
|
||||||
|
total_non_streamed_requests: usize,
|
||||||
|
|
||||||
|
// model usage tracking
|
||||||
|
models_used: std::collections::HashMap<String, usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChatCompletionAggregator {
|
||||||
|
pub fn from_request(model: &str, message_count: usize, is_stream: bool) -> Self {
|
||||||
|
let mut models_used = std::collections::HashMap::new();
|
||||||
|
models_used.insert(model.to_string(), 1);
|
||||||
|
|
||||||
|
Self {
|
||||||
|
total_received: 1,
|
||||||
|
total_succeeded: 0,
|
||||||
|
time_spent: BinaryHeap::new(),
|
||||||
|
|
||||||
|
total_messages: message_count,
|
||||||
|
total_streamed_requests: if is_stream { 1 } else { 0 },
|
||||||
|
total_non_streamed_requests: if is_stream { 0 } else { 1 },
|
||||||
|
|
||||||
|
models_used,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self, time_spent: std::time::Duration) {
|
||||||
|
self.total_succeeded += 1;
|
||||||
|
self.time_spent.push(time_spent.as_millis() as usize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Aggregate for ChatCompletionAggregator {
|
||||||
|
fn event_name(&self) -> &'static str {
|
||||||
|
"Chat Completion POST"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
||||||
|
let Self {
|
||||||
|
total_received,
|
||||||
|
total_succeeded,
|
||||||
|
mut time_spent,
|
||||||
|
total_messages,
|
||||||
|
total_streamed_requests,
|
||||||
|
total_non_streamed_requests,
|
||||||
|
models_used,
|
||||||
|
..
|
||||||
|
} = *new;
|
||||||
|
|
||||||
|
// Aggregate time spent
|
||||||
|
self.time_spent.append(&mut time_spent);
|
||||||
|
|
||||||
|
// Aggregate counters
|
||||||
|
self.total_received = self.total_received.saturating_add(total_received);
|
||||||
|
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
||||||
|
self.total_messages = self.total_messages.saturating_add(total_messages);
|
||||||
|
self.total_streamed_requests =
|
||||||
|
self.total_streamed_requests.saturating_add(total_streamed_requests);
|
||||||
|
self.total_non_streamed_requests =
|
||||||
|
self.total_non_streamed_requests.saturating_add(total_non_streamed_requests);
|
||||||
|
|
||||||
|
// Aggregate model usage
|
||||||
|
for (model, count) in models_used {
|
||||||
|
*self.models_used.entry(model).or_insert(0) += count;
|
||||||
|
}
|
||||||
|
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_event(self: Box<Self>) -> Value {
|
||||||
|
let Self {
|
||||||
|
total_received,
|
||||||
|
total_succeeded,
|
||||||
|
time_spent,
|
||||||
|
total_messages,
|
||||||
|
total_streamed_requests,
|
||||||
|
total_non_streamed_requests,
|
||||||
|
models_used,
|
||||||
|
..
|
||||||
|
} = *self;
|
||||||
|
|
||||||
|
// Compute time statistics
|
||||||
|
let time_spent: Vec<usize> = time_spent.into_sorted_vec();
|
||||||
|
let (max_time, min_time, avg_time) = if time_spent.is_empty() {
|
||||||
|
(0, 0, 0)
|
||||||
|
} else {
|
||||||
|
let max_time = time_spent.last().unwrap_or(&0);
|
||||||
|
let min_time = time_spent.first().unwrap_or(&0);
|
||||||
|
let sum: usize = time_spent.iter().sum();
|
||||||
|
let avg_time = sum / time_spent.len();
|
||||||
|
(*max_time, *min_time, avg_time)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Compute average messages per request
|
||||||
|
let avg_messages_per_request =
|
||||||
|
if total_received > 0 { total_messages as f64 / total_received as f64 } else { 0.0 };
|
||||||
|
|
||||||
|
// Compute streaming vs non-streaming proportions
|
||||||
|
let streaming_ratio = if total_received > 0 {
|
||||||
|
total_streamed_requests as f64 / total_received as f64
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
|
||||||
|
json!({
|
||||||
|
"total_received": total_received,
|
||||||
|
"total_succeeded": total_succeeded,
|
||||||
|
"time_spent": {
|
||||||
|
"max": max_time,
|
||||||
|
"min": min_time,
|
||||||
|
"avg": avg_time
|
||||||
|
},
|
||||||
|
"total_messages": total_messages,
|
||||||
|
"avg_messages_per_request": avg_messages_per_request,
|
||||||
|
"total_streamed_requests": total_streamed_requests,
|
||||||
|
"total_non_streamed_requests": total_non_streamed_requests,
|
||||||
|
"streaming_ratio": streaming_ratio,
|
||||||
|
"models_used": models_used,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -13,9 +13,9 @@ use async_openai::types::{
|
|||||||
ChatCompletionRequestDeveloperMessageContent, ChatCompletionRequestMessage,
|
ChatCompletionRequestDeveloperMessageContent, ChatCompletionRequestMessage,
|
||||||
ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent,
|
ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent,
|
||||||
ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent,
|
ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent,
|
||||||
ChatCompletionStreamResponseDelta, ChatCompletionToolArgs, ChatCompletionToolType,
|
ChatCompletionStreamOptions, ChatCompletionStreamResponseDelta, ChatCompletionToolArgs,
|
||||||
CreateChatCompletionRequest, CreateChatCompletionStreamResponse, FinishReason, FunctionCall,
|
ChatCompletionToolType, CreateChatCompletionRequest, CreateChatCompletionStreamResponse,
|
||||||
FunctionCallStream, FunctionObjectArgs,
|
FinishReason, FunctionCall, FunctionCallStream, FunctionObjectArgs,
|
||||||
};
|
};
|
||||||
use async_openai::Client;
|
use async_openai::Client;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
@ -27,15 +27,17 @@ use meilisearch_types::features::{
|
|||||||
ChatCompletionPrompts as DbChatCompletionPrompts,
|
ChatCompletionPrompts as DbChatCompletionPrompts,
|
||||||
ChatCompletionSource as DbChatCompletionSource, SystemRole,
|
ChatCompletionSource as DbChatCompletionSource, SystemRole,
|
||||||
};
|
};
|
||||||
|
use meilisearch_types::heed::RoTxn;
|
||||||
use meilisearch_types::keys::actions;
|
use meilisearch_types::keys::actions;
|
||||||
use meilisearch_types::milli::index::ChatConfig;
|
use meilisearch_types::milli::index::ChatConfig;
|
||||||
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, TimeBudget};
|
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget};
|
||||||
use meilisearch_types::{Document, Index};
|
use meilisearch_types::{Document, Index};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use tokio::runtime::Handle;
|
use tokio::runtime::Handle;
|
||||||
use tokio::sync::mpsc::error::SendError;
|
use tokio::sync::mpsc::error::SendError;
|
||||||
|
|
||||||
|
use super::chat_completion_analytics::ChatCompletionAggregator;
|
||||||
use super::config::Config;
|
use super::config::Config;
|
||||||
use super::errors::{MistralError, OpenAiOutsideError, StreamErrorEvent};
|
use super::errors::{MistralError, OpenAiOutsideError, StreamErrorEvent};
|
||||||
use super::utils::format_documents;
|
use super::utils::format_documents;
|
||||||
@ -43,10 +45,15 @@ use super::{
|
|||||||
ChatsParam, MEILI_APPEND_CONVERSATION_MESSAGE_NAME, MEILI_SEARCH_IN_INDEX_FUNCTION_NAME,
|
ChatsParam, MEILI_APPEND_CONVERSATION_MESSAGE_NAME, MEILI_SEARCH_IN_INDEX_FUNCTION_NAME,
|
||||||
MEILI_SEARCH_PROGRESS_NAME, MEILI_SEARCH_SOURCES_NAME,
|
MEILI_SEARCH_PROGRESS_NAME, MEILI_SEARCH_SOURCES_NAME,
|
||||||
};
|
};
|
||||||
|
use crate::analytics::Analytics;
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _};
|
use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _};
|
||||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
use crate::metrics::{
|
||||||
|
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE, MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE,
|
||||||
|
MEILISEARCH_CHAT_SEARCH_REQUESTS, MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE,
|
||||||
|
MEILISEARCH_DEGRADED_SEARCH_REQUESTS,
|
||||||
|
};
|
||||||
use crate::routes::chats::utils::SseEventSender;
|
use crate::routes::chats::utils::SseEventSender;
|
||||||
use crate::routes::indexes::search::search_kind;
|
use crate::routes::indexes::search::search_kind;
|
||||||
use crate::search::{add_search_rules, prepare_search, search_from_kind, SearchQuery};
|
use crate::search::{add_search_rules, prepare_search, search_from_kind, SearchQuery};
|
||||||
@ -64,6 +71,7 @@ async fn chat(
|
|||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
search_queue: web::Data<SearchQueue>,
|
search_queue: web::Data<SearchQueue>,
|
||||||
web::Json(chat_completion): web::Json<CreateChatCompletionRequest>,
|
web::Json(chat_completion): web::Json<CreateChatCompletionRequest>,
|
||||||
|
analytics: web::Data<Analytics>,
|
||||||
) -> impl Responder {
|
) -> impl Responder {
|
||||||
let ChatsParam { workspace_uid } = chats_param.into_inner();
|
let ChatsParam { workspace_uid } = chats_param.into_inner();
|
||||||
|
|
||||||
@ -76,6 +84,7 @@ async fn chat(
|
|||||||
&workspace_uid,
|
&workspace_uid,
|
||||||
req,
|
req,
|
||||||
chat_completion,
|
chat_completion,
|
||||||
|
analytics,
|
||||||
)
|
)
|
||||||
.await,
|
.await,
|
||||||
)
|
)
|
||||||
@ -88,6 +97,7 @@ async fn chat(
|
|||||||
&workspace_uid,
|
&workspace_uid,
|
||||||
req,
|
req,
|
||||||
chat_completion,
|
chat_completion,
|
||||||
|
analytics,
|
||||||
)
|
)
|
||||||
.await,
|
.await,
|
||||||
)
|
)
|
||||||
@ -160,6 +170,7 @@ fn setup_search_tool(
|
|||||||
|
|
||||||
let mut index_uids = Vec::new();
|
let mut index_uids = Vec::new();
|
||||||
let mut function_description = prompts.search_description.clone();
|
let mut function_description = prompts.search_description.clone();
|
||||||
|
let mut filter_description = prompts.search_filter_param.clone();
|
||||||
index_scheduler.try_for_each_index::<_, ()>(|name, index| {
|
index_scheduler.try_for_each_index::<_, ()>(|name, index| {
|
||||||
// Make sure to skip unauthorized indexes
|
// Make sure to skip unauthorized indexes
|
||||||
if !filters.is_index_authorized(name) {
|
if !filters.is_index_authorized(name) {
|
||||||
@ -171,16 +182,22 @@ fn setup_search_tool(
|
|||||||
let index_description = chat_config.description;
|
let index_description = chat_config.description;
|
||||||
let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n");
|
let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n");
|
||||||
index_uids.push(name.to_string());
|
index_uids.push(name.to_string());
|
||||||
|
let facet_distributions = format_facet_distributions(index, &rtxn, 10).unwrap(); // TODO do not unwrap
|
||||||
|
let _ = writeln!(&mut filter_description, "\n## Facet distributions of the {name} index");
|
||||||
|
let _ = writeln!(&mut filter_description, "{facet_distributions}");
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
tracing::debug!("LLM function description: {function_description}");
|
||||||
|
tracing::debug!("LLM filter description: {filter_description}");
|
||||||
|
|
||||||
let tool = ChatCompletionToolArgs::default()
|
let tool = ChatCompletionToolArgs::default()
|
||||||
.r#type(ChatCompletionToolType::Function)
|
.r#type(ChatCompletionToolType::Function)
|
||||||
.function(
|
.function(
|
||||||
FunctionObjectArgs::default()
|
FunctionObjectArgs::default()
|
||||||
.name(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME)
|
.name(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME)
|
||||||
.description(&function_description)
|
.description(function_description)
|
||||||
.parameters(json!({
|
.parameters(json!({
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -194,9 +211,13 @@ fn setup_search_tool(
|
|||||||
// "type": ["string", "null"],
|
// "type": ["string", "null"],
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": prompts.search_q_param,
|
"description": prompts.search_q_param,
|
||||||
|
},
|
||||||
|
"filter": {
|
||||||
|
"type": "string",
|
||||||
|
"description": filter_description,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["index_uid", "q"],
|
"required": ["index_uid", "q", "filter"],
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
}))
|
}))
|
||||||
.strict(true)
|
.strict(true)
|
||||||
@ -238,11 +259,19 @@ async fn process_search_request(
|
|||||||
auth_token: &str,
|
auth_token: &str,
|
||||||
index_uid: String,
|
index_uid: String,
|
||||||
q: Option<String>,
|
q: Option<String>,
|
||||||
|
filter: Option<String>,
|
||||||
) -> Result<(Index, Vec<Document>, String), ResponseError> {
|
) -> Result<(Index, Vec<Document>, String), ResponseError> {
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let rtxn = index.static_read_txn()?;
|
let rtxn = index.static_read_txn()?;
|
||||||
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
|
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
|
||||||
let mut query = SearchQuery { q, ..SearchQuery::from(search_parameters) };
|
let mut query = SearchQuery {
|
||||||
|
q,
|
||||||
|
filter: filter.map(serde_json::Value::from),
|
||||||
|
..SearchQuery::from(search_parameters)
|
||||||
|
};
|
||||||
|
|
||||||
|
tracing::debug!("LLM query: {:?}", query);
|
||||||
|
|
||||||
let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate(
|
let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate(
|
||||||
auth_ctrl,
|
auth_ctrl,
|
||||||
auth_token,
|
auth_token,
|
||||||
@ -271,17 +300,26 @@ async fn process_search_request(
|
|||||||
let (search, _is_finite_pagination, _max_total_hits, _offset) =
|
let (search, _is_finite_pagination, _max_total_hits, _offset) =
|
||||||
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
|
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
|
||||||
|
|
||||||
search_from_kind(index_uid, search_kind, search)
|
match search_from_kind(index_uid, search_kind, search) {
|
||||||
.map(|(search_results, _)| (rtxn, search_results))
|
Ok((search_results, _)) => Ok((rtxn, Ok(search_results))),
|
||||||
.map_err(ResponseError::from)
|
Err(MeilisearchHttpError::Milli {
|
||||||
|
error: meilisearch_types::milli::Error::UserError(user_error),
|
||||||
|
index_name: _,
|
||||||
|
}) => Ok((rtxn, Err(user_error))),
|
||||||
|
Err(err) => Err(ResponseError::from(err)),
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
permit.drop().await;
|
permit.drop().await;
|
||||||
|
|
||||||
let output = output?;
|
let output = match output? {
|
||||||
|
Ok((rtxn, Ok(search_results))) => Ok((rtxn, search_results)),
|
||||||
|
Ok((_rtxn, Err(error))) => return Ok((index, Vec::new(), error.to_string())),
|
||||||
|
Err(err) => Err(err),
|
||||||
|
};
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
if let Ok((ref rtxn, ref search_result)) = output {
|
if let Ok((ref rtxn, ref search_result)) = output {
|
||||||
// aggregate.succeed(search_result);
|
MEILISEARCH_CHAT_SEARCH_REQUESTS.with_label_values(&["internal"]).inc();
|
||||||
if search_result.degraded {
|
if search_result.degraded {
|
||||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
||||||
}
|
}
|
||||||
@ -315,9 +353,18 @@ async fn non_streamed_chat(
|
|||||||
workspace_uid: &str,
|
workspace_uid: &str,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
chat_completion: CreateChatCompletionRequest,
|
chat_completion: CreateChatCompletionRequest,
|
||||||
|
analytics: web::Data<Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
index_scheduler.features().check_chat_completions("using the /chats chat completions route")?;
|
index_scheduler.features().check_chat_completions("using the /chats chat completions route")?;
|
||||||
|
|
||||||
|
// Create analytics aggregator
|
||||||
|
let aggregate = ChatCompletionAggregator::from_request(
|
||||||
|
&chat_completion.model,
|
||||||
|
chat_completion.messages.len(),
|
||||||
|
false, // non_streamed_chat is not streaming
|
||||||
|
);
|
||||||
|
let start_time = std::time::Instant::now();
|
||||||
|
|
||||||
if let Some(n) = chat_completion.n.filter(|&n| n != 1) {
|
if let Some(n) = chat_completion.n.filter(|&n| n != 1) {
|
||||||
return Err(ResponseError::from_msg(
|
return Err(ResponseError::from_msg(
|
||||||
format!("You tried to specify n = {n} but only single choices are supported (n = 1)."),
|
format!("You tried to specify n = {n} but only single choices are supported (n = 1)."),
|
||||||
@ -377,16 +424,19 @@ async fn non_streamed_chat(
|
|||||||
|
|
||||||
for call in meili_calls {
|
for call in meili_calls {
|
||||||
let result = match serde_json::from_str(&call.function.arguments) {
|
let result = match serde_json::from_str(&call.function.arguments) {
|
||||||
Ok(SearchInIndexParameters { index_uid, q }) => process_search_request(
|
Ok(SearchInIndexParameters { index_uid, q, filter }) => {
|
||||||
&index_scheduler,
|
process_search_request(
|
||||||
auth_ctrl.clone(),
|
&index_scheduler,
|
||||||
&search_queue,
|
auth_ctrl.clone(),
|
||||||
auth_token,
|
&search_queue,
|
||||||
index_uid,
|
auth_token,
|
||||||
q,
|
index_uid,
|
||||||
)
|
q,
|
||||||
.await
|
filter,
|
||||||
.map_err(|e| e.to_string()),
|
)
|
||||||
|
.await
|
||||||
|
.map_err(|e| e.to_string())
|
||||||
|
}
|
||||||
Err(err) => Err(err.to_string()),
|
Err(err) => Err(err.to_string()),
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -414,6 +464,11 @@ async fn non_streamed_chat(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Record success in analytics
|
||||||
|
let mut aggregate = aggregate;
|
||||||
|
aggregate.succeed(start_time.elapsed());
|
||||||
|
analytics.publish(aggregate, &req);
|
||||||
|
|
||||||
Ok(HttpResponse::Ok().json(response))
|
Ok(HttpResponse::Ok().json(response))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -424,6 +479,7 @@ async fn streamed_chat(
|
|||||||
workspace_uid: &str,
|
workspace_uid: &str,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
mut chat_completion: CreateChatCompletionRequest,
|
mut chat_completion: CreateChatCompletionRequest,
|
||||||
|
analytics: web::Data<Analytics>,
|
||||||
) -> Result<impl Responder, ResponseError> {
|
) -> Result<impl Responder, ResponseError> {
|
||||||
index_scheduler.features().check_chat_completions("using the /chats chat completions route")?;
|
index_scheduler.features().check_chat_completions("using the /chats chat completions route")?;
|
||||||
let filters = index_scheduler.filters();
|
let filters = index_scheduler.filters();
|
||||||
@ -445,6 +501,14 @@ async fn streamed_chat(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Create analytics aggregator
|
||||||
|
let mut aggregate = ChatCompletionAggregator::from_request(
|
||||||
|
&chat_completion.model,
|
||||||
|
chat_completion.messages.len(),
|
||||||
|
true, // streamed_chat is always streaming
|
||||||
|
);
|
||||||
|
let start_time = std::time::Instant::now();
|
||||||
|
|
||||||
let config = Config::new(&chat_settings);
|
let config = Config::new(&chat_settings);
|
||||||
let auth_token = extract_token_from_request(&req)?.unwrap().to_string();
|
let auth_token = extract_token_from_request(&req)?.unwrap().to_string();
|
||||||
let system_role = chat_settings.source.system_role(&chat_completion.model);
|
let system_role = chat_settings.source.system_role(&chat_completion.model);
|
||||||
@ -460,6 +524,7 @@ async fn streamed_chat(
|
|||||||
|
|
||||||
let (tx, rx) = tokio::sync::mpsc::channel(10);
|
let (tx, rx) = tokio::sync::mpsc::channel(10);
|
||||||
let tx = SseEventSender::new(tx);
|
let tx = SseEventSender::new(tx);
|
||||||
|
let workspace_uid = workspace_uid.to_string();
|
||||||
let _join_handle = Handle::current().spawn(async move {
|
let _join_handle = Handle::current().spawn(async move {
|
||||||
let client = Client::with_config(config.clone());
|
let client = Client::with_config(config.clone());
|
||||||
let mut global_tool_calls = HashMap::<u32, Call>::new();
|
let mut global_tool_calls = HashMap::<u32, Call>::new();
|
||||||
@ -469,6 +534,7 @@ async fn streamed_chat(
|
|||||||
let output = run_conversation(
|
let output = run_conversation(
|
||||||
&index_scheduler,
|
&index_scheduler,
|
||||||
&auth_ctrl,
|
&auth_ctrl,
|
||||||
|
&workspace_uid,
|
||||||
&search_queue,
|
&search_queue,
|
||||||
&auth_token,
|
&auth_token,
|
||||||
&client,
|
&client,
|
||||||
@ -490,6 +556,10 @@ async fn streamed_chat(
|
|||||||
let _ = tx.stop().await;
|
let _ = tx.stop().await;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Record success in analytics after the stream is set up
|
||||||
|
aggregate.succeed(start_time.elapsed());
|
||||||
|
analytics.publish(aggregate, &req);
|
||||||
|
|
||||||
Ok(Sse::from_infallible_receiver(rx).with_retry_duration(Duration::from_secs(10)))
|
Ok(Sse::from_infallible_receiver(rx).with_retry_duration(Duration::from_secs(10)))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -502,6 +572,7 @@ async fn run_conversation<C: async_openai::config::Config>(
|
|||||||
Data<IndexScheduler>,
|
Data<IndexScheduler>,
|
||||||
>,
|
>,
|
||||||
auth_ctrl: &web::Data<AuthController>,
|
auth_ctrl: &web::Data<AuthController>,
|
||||||
|
workspace_uid: &str,
|
||||||
search_queue: &web::Data<SearchQueue>,
|
search_queue: &web::Data<SearchQueue>,
|
||||||
auth_token: &str,
|
auth_token: &str,
|
||||||
client: &Client<C>,
|
client: &Client<C>,
|
||||||
@ -511,13 +582,34 @@ async fn run_conversation<C: async_openai::config::Config>(
|
|||||||
global_tool_calls: &mut HashMap<u32, Call>,
|
global_tool_calls: &mut HashMap<u32, Call>,
|
||||||
function_support: FunctionSupport,
|
function_support: FunctionSupport,
|
||||||
) -> Result<ControlFlow<Option<FinishReason>, ()>, SendError<Event>> {
|
) -> Result<ControlFlow<Option<FinishReason>, ()>, SendError<Event>> {
|
||||||
|
use DbChatCompletionSource::*;
|
||||||
|
|
||||||
let mut finish_reason = None;
|
let mut finish_reason = None;
|
||||||
|
chat_completion.stream_options = match source {
|
||||||
|
OpenAi | AzureOpenAi => Some(ChatCompletionStreamOptions { include_usage: true }),
|
||||||
|
Mistral | VLlm => None,
|
||||||
|
};
|
||||||
|
|
||||||
// safety: unwrap: can only happens if `stream` was set to `false`
|
// safety: unwrap: can only happens if `stream` was set to `false`
|
||||||
let mut response = client.chat().create_stream(chat_completion.clone()).await.unwrap();
|
let mut response = client.chat().create_stream(chat_completion.clone()).await.unwrap();
|
||||||
while let Some(result) = response.next().await {
|
while let Some(result) = response.next().await {
|
||||||
match result {
|
match result {
|
||||||
Ok(resp) => {
|
Ok(resp) => {
|
||||||
let choice = &resp.choices[0];
|
if let Some(usage) = resp.usage.as_ref() {
|
||||||
|
MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE
|
||||||
|
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||||
|
.inc_by(usage.prompt_tokens as u64);
|
||||||
|
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE
|
||||||
|
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||||
|
.inc_by(usage.completion_tokens as u64);
|
||||||
|
MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE
|
||||||
|
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||||
|
.inc_by(usage.total_tokens as u64);
|
||||||
|
}
|
||||||
|
let choice = match resp.choices.first() {
|
||||||
|
Some(choice) => choice,
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
finish_reason = choice.finish_reason;
|
finish_reason = choice.finish_reason;
|
||||||
|
|
||||||
let ChatCompletionStreamResponseDelta { ref tool_calls, .. } = &choice.delta;
|
let ChatCompletionStreamResponseDelta { ref tool_calls, .. } = &choice.delta;
|
||||||
@ -659,13 +751,14 @@ async fn handle_meili_tools(
|
|||||||
let mut error = None;
|
let mut error = None;
|
||||||
|
|
||||||
let result = match serde_json::from_str(&call.function.arguments) {
|
let result = match serde_json::from_str(&call.function.arguments) {
|
||||||
Ok(SearchInIndexParameters { index_uid, q }) => match process_search_request(
|
Ok(SearchInIndexParameters { index_uid, q, filter }) => match process_search_request(
|
||||||
index_scheduler,
|
index_scheduler,
|
||||||
auth_ctrl.clone(),
|
auth_ctrl.clone(),
|
||||||
search_queue,
|
search_queue,
|
||||||
auth_token,
|
auth_token,
|
||||||
index_uid,
|
index_uid,
|
||||||
q,
|
q,
|
||||||
|
filter,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
@ -741,4 +834,42 @@ struct SearchInIndexParameters {
|
|||||||
index_uid: String,
|
index_uid: String,
|
||||||
/// The query parameter to use.
|
/// The query parameter to use.
|
||||||
q: Option<String>,
|
q: Option<String>,
|
||||||
|
/// The filter parameter to use.
|
||||||
|
filter: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_facet_distributions(
|
||||||
|
index: &Index,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
max_values_per_facet: usize,
|
||||||
|
) -> meilisearch_types::milli::Result<String> {
|
||||||
|
let universe = index.documents_ids(rtxn)?;
|
||||||
|
let rules = index.filterable_attributes_rules(rtxn)?;
|
||||||
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
|
let filterable_attributes = fields_ids_map
|
||||||
|
.names()
|
||||||
|
.filter(|name| rules.iter().any(|rule| matches!(rule.match_str(name), PatternMatch::Match)))
|
||||||
|
.map(|name| (name, OrderBy::Count));
|
||||||
|
let facets_distribution = index
|
||||||
|
.facets_distribution(rtxn)
|
||||||
|
.max_values_per_facet(max_values_per_facet)
|
||||||
|
.candidates(universe)
|
||||||
|
.facets(filterable_attributes)
|
||||||
|
.execute()?;
|
||||||
|
|
||||||
|
let mut output = String::new();
|
||||||
|
for (facet_name, entries) in facets_distribution {
|
||||||
|
let _ = write!(&mut output, "{}: ", facet_name);
|
||||||
|
let total_entries = entries.len();
|
||||||
|
for (i, (value, _count)) in entries.into_iter().enumerate() {
|
||||||
|
let _ = if total_entries.saturating_sub(1) == i {
|
||||||
|
write!(&mut output, "{value}.")
|
||||||
|
} else {
|
||||||
|
write!(&mut output, "{value}, ")
|
||||||
|
};
|
||||||
|
}
|
||||||
|
let _ = writeln!(&mut output);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(output)
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,7 @@ impl Config {
|
|||||||
pub fn new(chat_settings: &DbChatSettings) -> Self {
|
pub fn new(chat_settings: &DbChatSettings) -> Self {
|
||||||
use meilisearch_types::features::ChatCompletionSource::*;
|
use meilisearch_types::features::ChatCompletionSource::*;
|
||||||
match chat_settings.source {
|
match chat_settings.source {
|
||||||
OpenAi | Mistral | Gemini | VLlm => {
|
OpenAi | Mistral | VLlm => {
|
||||||
let mut config = OpenAIConfig::default();
|
let mut config = OpenAIConfig::default();
|
||||||
if let Some(org_id) = chat_settings.org_id.as_ref() {
|
if let Some(org_id) = chat_settings.org_id.as_ref() {
|
||||||
config = config.with_org_id(org_id);
|
config = config.with_org_id(org_id);
|
||||||
|
@ -19,6 +19,7 @@ use crate::extractors::authentication::policies::ActionPolicy;
|
|||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::routes::PAGINATION_DEFAULT_LIMIT;
|
use crate::routes::PAGINATION_DEFAULT_LIMIT;
|
||||||
|
|
||||||
|
mod chat_completion_analytics;
|
||||||
pub mod chat_completions;
|
pub mod chat_completions;
|
||||||
mod config;
|
mod config;
|
||||||
mod errors;
|
mod errors;
|
||||||
|
@ -8,8 +8,8 @@ use meilisearch_types::error::{Code, ResponseError};
|
|||||||
use meilisearch_types::features::{
|
use meilisearch_types::features::{
|
||||||
ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSettings,
|
ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSettings,
|
||||||
ChatCompletionSource as DbChatCompletionSource, DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT,
|
ChatCompletionSource as DbChatCompletionSource, DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT,
|
||||||
DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT,
|
DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT,
|
||||||
DEFAULT_CHAT_SYSTEM_PROMPT,
|
DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT, DEFAULT_CHAT_SYSTEM_PROMPT,
|
||||||
};
|
};
|
||||||
use meilisearch_types::keys::actions;
|
use meilisearch_types::keys::actions;
|
||||||
use meilisearch_types::milli::update::Setting;
|
use meilisearch_types::milli::update::Setting;
|
||||||
@ -84,6 +84,11 @@ async fn patch_settings(
|
|||||||
Setting::Reset => DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
|
Setting::Reset => DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
|
||||||
Setting::NotSet => old_settings.prompts.search_q_param,
|
Setting::NotSet => old_settings.prompts.search_q_param,
|
||||||
},
|
},
|
||||||
|
search_filter_param: match new_prompts.search_filter_param {
|
||||||
|
Setting::Set(new_description) => new_description,
|
||||||
|
Setting::Reset => DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
|
||||||
|
Setting::NotSet => old_settings.prompts.search_filter_param,
|
||||||
|
},
|
||||||
search_index_uid_param: match new_prompts.search_index_uid_param {
|
search_index_uid_param: match new_prompts.search_index_uid_param {
|
||||||
Setting::Set(new_description) => new_description,
|
Setting::Set(new_description) => new_description,
|
||||||
Setting::Reset => DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
|
Setting::Reset => DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
|
||||||
@ -218,7 +223,6 @@ pub enum ChatCompletionSource {
|
|||||||
#[default]
|
#[default]
|
||||||
OpenAi,
|
OpenAi,
|
||||||
Mistral,
|
Mistral,
|
||||||
Gemini,
|
|
||||||
AzureOpenAi,
|
AzureOpenAi,
|
||||||
VLlm,
|
VLlm,
|
||||||
}
|
}
|
||||||
@ -229,7 +233,6 @@ impl From<ChatCompletionSource> for DbChatCompletionSource {
|
|||||||
match source {
|
match source {
|
||||||
OpenAi => DbChatCompletionSource::OpenAi,
|
OpenAi => DbChatCompletionSource::OpenAi,
|
||||||
Mistral => DbChatCompletionSource::Mistral,
|
Mistral => DbChatCompletionSource::Mistral,
|
||||||
Gemini => DbChatCompletionSource::Gemini,
|
|
||||||
AzureOpenAi => DbChatCompletionSource::AzureOpenAi,
|
AzureOpenAi => DbChatCompletionSource::AzureOpenAi,
|
||||||
VLlm => DbChatCompletionSource::VLlm,
|
VLlm => DbChatCompletionSource::VLlm,
|
||||||
}
|
}
|
||||||
@ -254,6 +257,10 @@ pub struct ChatPrompts {
|
|||||||
#[schema(value_type = Option<String>, example = json!("This is query parameter..."))]
|
#[schema(value_type = Option<String>, example = json!("This is query parameter..."))]
|
||||||
pub search_q_param: Setting<String>,
|
pub search_q_param: Setting<String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchFilterParamPrompt>)]
|
||||||
|
#[schema(value_type = Option<String>, example = json!("This is filter parameter..."))]
|
||||||
|
pub search_filter_param: Setting<String>,
|
||||||
|
#[serde(default)]
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchIndexUidParamPrompt>)]
|
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchIndexUidParamPrompt>)]
|
||||||
#[schema(value_type = Option<String>, example = json!("This is index you want to search in..."))]
|
#[schema(value_type = Option<String>, example = json!("This is index you want to search in..."))]
|
||||||
pub search_index_uid_param: Setting<String>,
|
pub search_index_uid_param: Setting<String>,
|
||||||
|
183
crates/meilisearch/src/routes/export.rs
Normal file
183
crates/meilisearch/src/routes/export.rs
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::convert::Infallible;
|
||||||
|
use std::str::FromStr as _;
|
||||||
|
|
||||||
|
use actix_web::web::{self, Data};
|
||||||
|
use actix_web::{HttpRequest, HttpResponse};
|
||||||
|
use byte_unit::Byte;
|
||||||
|
use deserr::actix_web::AwebJson;
|
||||||
|
use deserr::Deserr;
|
||||||
|
use index_scheduler::IndexScheduler;
|
||||||
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
|
use meilisearch_types::error::deserr_codes::*;
|
||||||
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||||
|
use meilisearch_types::keys::actions;
|
||||||
|
use meilisearch_types::tasks::{ExportIndexSettings as DbExportIndexSettings, KindWithContent};
|
||||||
|
use serde::Serialize;
|
||||||
|
use serde_json::Value;
|
||||||
|
use tracing::debug;
|
||||||
|
use utoipa::{OpenApi, ToSchema};
|
||||||
|
|
||||||
|
use crate::analytics::Analytics;
|
||||||
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
|
use crate::extractors::authentication::GuardedData;
|
||||||
|
use crate::routes::export_analytics::ExportAnalytics;
|
||||||
|
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||||
|
use crate::Opt;
|
||||||
|
|
||||||
|
#[derive(OpenApi)]
|
||||||
|
#[openapi(
|
||||||
|
paths(export),
|
||||||
|
tags((
|
||||||
|
name = "Export",
|
||||||
|
description = "The `/export` route allows you to trigger an export process to a remote Meilisearch instance.",
|
||||||
|
external_docs(url = "https://www.meilisearch.com/docs/reference/api/export"),
|
||||||
|
)),
|
||||||
|
)]
|
||||||
|
pub struct ExportApi;
|
||||||
|
|
||||||
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
|
cfg.service(web::resource("").route(web::post().to(export)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[utoipa::path(
|
||||||
|
post,
|
||||||
|
path = "",
|
||||||
|
tag = "Export",
|
||||||
|
security(("Bearer" = ["export", "*"])),
|
||||||
|
responses(
|
||||||
|
(status = 202, description = "Export successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
|
||||||
|
{
|
||||||
|
"taskUid": 1,
|
||||||
|
"status": "enqueued",
|
||||||
|
"type": "export",
|
||||||
|
"enqueuedAt": "2021-08-11T09:25:53.000000Z"
|
||||||
|
})),
|
||||||
|
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||||
|
{
|
||||||
|
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||||
|
"code": "missing_authorization_header",
|
||||||
|
"type": "auth",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||||
|
}
|
||||||
|
)),
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn export(
|
||||||
|
index_scheduler: GuardedData<ActionPolicy<{ actions::EXPORT }>, Data<IndexScheduler>>,
|
||||||
|
export: AwebJson<Export, DeserrJsonError>,
|
||||||
|
req: HttpRequest,
|
||||||
|
opt: web::Data<Opt>,
|
||||||
|
analytics: Data<Analytics>,
|
||||||
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
|
let export = export.into_inner();
|
||||||
|
debug!(returns = ?export, "Trigger export");
|
||||||
|
|
||||||
|
let analytics_aggregate = ExportAnalytics::from_export(&export);
|
||||||
|
|
||||||
|
let Export { url, api_key, payload_size, indexes } = export;
|
||||||
|
|
||||||
|
let indexes = match indexes {
|
||||||
|
Some(indexes) => indexes
|
||||||
|
.into_iter()
|
||||||
|
.map(|(pattern, ExportIndexSettings { filter, override_settings })| {
|
||||||
|
(pattern, DbExportIndexSettings { filter, override_settings })
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
None => BTreeMap::from([(
|
||||||
|
IndexUidPattern::new_unchecked("*"),
|
||||||
|
DbExportIndexSettings::default(),
|
||||||
|
)]),
|
||||||
|
};
|
||||||
|
|
||||||
|
let task = KindWithContent::Export {
|
||||||
|
url,
|
||||||
|
api_key,
|
||||||
|
payload_size: payload_size.map(|ByteWithDeserr(bytes)| bytes),
|
||||||
|
indexes,
|
||||||
|
};
|
||||||
|
let uid = get_task_id(&req, &opt)?;
|
||||||
|
let dry_run = is_dry_run(&req, &opt)?;
|
||||||
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
|
analytics.publish(analytics_aggregate, &req);
|
||||||
|
|
||||||
|
Ok(HttpResponse::Ok().json(task))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
#[schema(rename_all = "camelCase")]
|
||||||
|
pub struct Export {
|
||||||
|
#[schema(value_type = Option<String>, example = json!("https://ms-1234.heaven.meilisearch.com"))]
|
||||||
|
#[serde(default)]
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidExportUrl>)]
|
||||||
|
pub url: String,
|
||||||
|
#[schema(value_type = Option<String>, example = json!("1234abcd"))]
|
||||||
|
#[serde(default)]
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidExportApiKey>)]
|
||||||
|
pub api_key: Option<String>,
|
||||||
|
#[schema(value_type = Option<String>, example = json!("24MiB"))]
|
||||||
|
#[serde(default)]
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidExportPayloadSize>)]
|
||||||
|
pub payload_size: Option<ByteWithDeserr>,
|
||||||
|
#[schema(value_type = Option<BTreeMap<String, ExportIndexSettings>>, example = json!({ "*": { "filter": null } }))]
|
||||||
|
#[deserr(default)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub indexes: Option<BTreeMap<IndexUidPattern, ExportIndexSettings>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A wrapper around the `Byte` type that implements `Deserr`.
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub struct ByteWithDeserr(pub Byte);
|
||||||
|
|
||||||
|
impl<E> deserr::Deserr<E> for ByteWithDeserr
|
||||||
|
where
|
||||||
|
E: deserr::DeserializeError,
|
||||||
|
{
|
||||||
|
fn deserialize_from_value<V: deserr::IntoValue>(
|
||||||
|
value: deserr::Value<V>,
|
||||||
|
location: deserr::ValuePointerRef,
|
||||||
|
) -> Result<Self, E> {
|
||||||
|
use deserr::{ErrorKind, Value, ValueKind};
|
||||||
|
match value {
|
||||||
|
Value::Integer(integer) => Ok(ByteWithDeserr(Byte::from_u64(integer))),
|
||||||
|
Value::String(string) => Byte::from_str(&string).map(ByteWithDeserr).map_err(|e| {
|
||||||
|
deserr::take_cf_content(E::error::<Infallible>(
|
||||||
|
None,
|
||||||
|
ErrorKind::Unexpected { msg: e.to_string() },
|
||||||
|
location,
|
||||||
|
))
|
||||||
|
}),
|
||||||
|
actual => Err(deserr::take_cf_content(E::error(
|
||||||
|
None,
|
||||||
|
ErrorKind::IncorrectValueKind {
|
||||||
|
actual,
|
||||||
|
accepted: &[ValueKind::Integer, ValueKind::String],
|
||||||
|
},
|
||||||
|
location,
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
#[schema(rename_all = "camelCase")]
|
||||||
|
pub struct ExportIndexSettings {
|
||||||
|
#[schema(value_type = Option<String>, example = json!("genres = action"))]
|
||||||
|
#[serde(default)]
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidExportIndexFilter>)]
|
||||||
|
pub filter: Option<Value>,
|
||||||
|
#[schema(value_type = Option<bool>, example = json!(true))]
|
||||||
|
#[serde(default)]
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidExportIndexOverrideSettings>)]
|
||||||
|
pub override_settings: bool,
|
||||||
|
}
|
111
crates/meilisearch/src/routes/export_analytics.rs
Normal file
111
crates/meilisearch/src/routes/export_analytics.rs
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
use url::Url;
|
||||||
|
|
||||||
|
use crate::analytics::Aggregate;
|
||||||
|
use crate::routes::export::Export;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct ExportAnalytics {
|
||||||
|
total_received: usize,
|
||||||
|
has_api_key: bool,
|
||||||
|
sum_exports_meilisearch_cloud: usize,
|
||||||
|
sum_index_patterns: usize,
|
||||||
|
sum_patterns_with_filter: usize,
|
||||||
|
sum_patterns_with_override_settings: usize,
|
||||||
|
payload_sizes: Vec<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExportAnalytics {
|
||||||
|
pub fn from_export(export: &Export) -> Self {
|
||||||
|
let Export { url, api_key, payload_size, indexes } = export;
|
||||||
|
|
||||||
|
let url = Url::parse(url).ok();
|
||||||
|
let is_meilisearch_cloud = url.as_ref().and_then(Url::host_str).is_some_and(|host| {
|
||||||
|
host.ends_with("meilisearch.dev")
|
||||||
|
|| host.ends_with("meilisearch.com")
|
||||||
|
|| host.ends_with("meilisearch.io")
|
||||||
|
});
|
||||||
|
let has_api_key = api_key.is_some();
|
||||||
|
let index_patterns_count = indexes.as_ref().map_or(0, |indexes| indexes.len());
|
||||||
|
let patterns_with_filter_count = indexes.as_ref().map_or(0, |indexes| {
|
||||||
|
indexes.values().filter(|settings| settings.filter.is_some()).count()
|
||||||
|
});
|
||||||
|
let patterns_with_override_settings_count = indexes.as_ref().map_or(0, |indexes| {
|
||||||
|
indexes.values().filter(|settings| settings.override_settings).count()
|
||||||
|
});
|
||||||
|
let payload_sizes =
|
||||||
|
if let Some(crate::routes::export::ByteWithDeserr(byte_size)) = payload_size {
|
||||||
|
vec![byte_size.as_u64()]
|
||||||
|
} else {
|
||||||
|
vec![]
|
||||||
|
};
|
||||||
|
|
||||||
|
Self {
|
||||||
|
total_received: 1,
|
||||||
|
has_api_key,
|
||||||
|
sum_exports_meilisearch_cloud: is_meilisearch_cloud as usize,
|
||||||
|
sum_index_patterns: index_patterns_count,
|
||||||
|
sum_patterns_with_filter: patterns_with_filter_count,
|
||||||
|
sum_patterns_with_override_settings: patterns_with_override_settings_count,
|
||||||
|
payload_sizes,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Aggregate for ExportAnalytics {
|
||||||
|
fn event_name(&self) -> &'static str {
|
||||||
|
"Export Triggered"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
|
||||||
|
self.total_received += other.total_received;
|
||||||
|
self.has_api_key |= other.has_api_key;
|
||||||
|
self.sum_exports_meilisearch_cloud += other.sum_exports_meilisearch_cloud;
|
||||||
|
self.sum_index_patterns += other.sum_index_patterns;
|
||||||
|
self.sum_patterns_with_filter += other.sum_patterns_with_filter;
|
||||||
|
self.sum_patterns_with_override_settings += other.sum_patterns_with_override_settings;
|
||||||
|
self.payload_sizes.extend(other.payload_sizes);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_event(self: Box<Self>) -> serde_json::Value {
|
||||||
|
let avg_payload_size = if self.payload_sizes.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(self.payload_sizes.iter().sum::<u64>() / self.payload_sizes.len() as u64)
|
||||||
|
};
|
||||||
|
|
||||||
|
let avg_exports_meilisearch_cloud = if self.total_received == 0 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(self.sum_exports_meilisearch_cloud as f64 / self.total_received as f64)
|
||||||
|
};
|
||||||
|
|
||||||
|
let avg_index_patterns = if self.total_received == 0 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(self.sum_index_patterns as f64 / self.total_received as f64)
|
||||||
|
};
|
||||||
|
|
||||||
|
let avg_patterns_with_filter = if self.total_received == 0 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(self.sum_patterns_with_filter as f64 / self.total_received as f64)
|
||||||
|
};
|
||||||
|
|
||||||
|
let avg_patterns_with_override_settings = if self.total_received == 0 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(self.sum_patterns_with_override_settings as f64 / self.total_received as f64)
|
||||||
|
};
|
||||||
|
|
||||||
|
serde_json::json!({
|
||||||
|
"total_received": self.total_received,
|
||||||
|
"has_api_key": self.has_api_key,
|
||||||
|
"avg_exports_meilisearch_cloud": avg_exports_meilisearch_cloud,
|
||||||
|
"avg_index_patterns": avg_index_patterns,
|
||||||
|
"avg_patterns_with_filter": avg_patterns_with_filter,
|
||||||
|
"avg_patterns_with_override_settings": avg_patterns_with_override_settings,
|
||||||
|
"avg_payload_size": avg_payload_size,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -54,6 +54,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
get_task_documents_route: Some(false),
|
get_task_documents_route: Some(false),
|
||||||
composite_embedders: Some(false),
|
composite_embedders: Some(false),
|
||||||
chat_completions: Some(false),
|
chat_completions: Some(false),
|
||||||
|
multimodal: Some(false),
|
||||||
})),
|
})),
|
||||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||||
{
|
{
|
||||||
@ -100,6 +101,8 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub composite_embedders: Option<bool>,
|
pub composite_embedders: Option<bool>,
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub chat_completions: Option<bool>,
|
pub chat_completions: Option<bool>,
|
||||||
|
#[deserr(default)]
|
||||||
|
pub multimodal: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
|
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
|
||||||
@ -113,6 +116,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
|||||||
get_task_documents_route,
|
get_task_documents_route,
|
||||||
composite_embedders,
|
composite_embedders,
|
||||||
chat_completions,
|
chat_completions,
|
||||||
|
multimodal,
|
||||||
} = value;
|
} = value;
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
@ -124,6 +128,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
|||||||
get_task_documents_route: Some(get_task_documents_route),
|
get_task_documents_route: Some(get_task_documents_route),
|
||||||
composite_embedders: Some(composite_embedders),
|
composite_embedders: Some(composite_embedders),
|
||||||
chat_completions: Some(chat_completions),
|
chat_completions: Some(chat_completions),
|
||||||
|
multimodal: Some(multimodal),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -138,6 +143,7 @@ pub struct PatchExperimentalFeatureAnalytics {
|
|||||||
get_task_documents_route: bool,
|
get_task_documents_route: bool,
|
||||||
composite_embedders: bool,
|
composite_embedders: bool,
|
||||||
chat_completions: bool,
|
chat_completions: bool,
|
||||||
|
multimodal: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||||
@ -155,6 +161,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
|||||||
get_task_documents_route: new.get_task_documents_route,
|
get_task_documents_route: new.get_task_documents_route,
|
||||||
composite_embedders: new.composite_embedders,
|
composite_embedders: new.composite_embedders,
|
||||||
chat_completions: new.chat_completions,
|
chat_completions: new.chat_completions,
|
||||||
|
multimodal: new.multimodal,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,6 +188,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
|||||||
get_task_documents_route: Some(false),
|
get_task_documents_route: Some(false),
|
||||||
composite_embedders: Some(false),
|
composite_embedders: Some(false),
|
||||||
chat_completions: Some(false),
|
chat_completions: Some(false),
|
||||||
|
multimodal: Some(false),
|
||||||
})),
|
})),
|
||||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||||
{
|
{
|
||||||
@ -223,6 +231,7 @@ async fn patch_features(
|
|||||||
.composite_embedders
|
.composite_embedders
|
||||||
.unwrap_or(old_features.composite_embedders),
|
.unwrap_or(old_features.composite_embedders),
|
||||||
chat_completions: new_features.0.chat_completions.unwrap_or(old_features.chat_completions),
|
chat_completions: new_features.0.chat_completions.unwrap_or(old_features.chat_completions),
|
||||||
|
multimodal: new_features.0.multimodal.unwrap_or(old_features.multimodal),
|
||||||
};
|
};
|
||||||
|
|
||||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||||
@ -237,6 +246,7 @@ async fn patch_features(
|
|||||||
get_task_documents_route,
|
get_task_documents_route,
|
||||||
composite_embedders,
|
composite_embedders,
|
||||||
chat_completions,
|
chat_completions,
|
||||||
|
multimodal,
|
||||||
} = new_features;
|
} = new_features;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
@ -249,6 +259,7 @@ async fn patch_features(
|
|||||||
get_task_documents_route,
|
get_task_documents_route,
|
||||||
composite_embedders,
|
composite_embedders,
|
||||||
chat_completions,
|
chat_completions,
|
||||||
|
multimodal,
|
||||||
},
|
},
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::io::{ErrorKind, Seek as _};
|
use std::io::{ErrorKind, Seek as _};
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
use actix_web::http::header::CONTENT_TYPE;
|
use actix_web::http::header::CONTENT_TYPE;
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
@ -17,9 +18,11 @@ use meilisearch_types::error::deserr_codes::*;
|
|||||||
use meilisearch_types::error::{Code, ResponseError};
|
use meilisearch_types::error::{Code, ResponseError};
|
||||||
use meilisearch_types::heed::RoTxn;
|
use meilisearch_types::heed::RoTxn;
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
|
use meilisearch_types::milli::documents::sort::recursive_sort;
|
||||||
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||||
use meilisearch_types::milli::DocumentId;
|
use meilisearch_types::milli::{AscDesc, DocumentId};
|
||||||
use meilisearch_types::serde_cs::vec::CS;
|
use meilisearch_types::serde_cs::vec::CS;
|
||||||
use meilisearch_types::star_or::OptionStarOrList;
|
use meilisearch_types::star_or::OptionStarOrList;
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
@ -42,6 +45,7 @@ use crate::extractors::authentication::policies::*;
|
|||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::payload::Payload;
|
use crate::extractors::payload::Payload;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
|
use crate::routes::indexes::search::fix_sort_query_parameters;
|
||||||
use crate::routes::{
|
use crate::routes::{
|
||||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||||
};
|
};
|
||||||
@ -135,6 +139,8 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
|
|||||||
per_document_id: bool,
|
per_document_id: bool,
|
||||||
// if a filter was used
|
// if a filter was used
|
||||||
per_filter: bool,
|
per_filter: bool,
|
||||||
|
// if documents were sorted
|
||||||
|
sort: bool,
|
||||||
|
|
||||||
#[serde(rename = "vector.retrieve_vectors")]
|
#[serde(rename = "vector.retrieve_vectors")]
|
||||||
retrieve_vectors: bool,
|
retrieve_vectors: bool,
|
||||||
@ -151,39 +157,6 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
|
|||||||
marker: std::marker::PhantomData<Method>,
|
marker: std::marker::PhantomData<Method>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
|
||||||
pub enum DocumentFetchKind {
|
|
||||||
PerDocumentId { retrieve_vectors: bool },
|
|
||||||
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize },
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
|
|
||||||
pub fn from_query(query: &DocumentFetchKind) -> Self {
|
|
||||||
let (limit, offset, retrieve_vectors) = match query {
|
|
||||||
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
|
|
||||||
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
|
|
||||||
(*limit, *offset, *retrieve_vectors)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let ids = match query {
|
|
||||||
DocumentFetchKind::Normal { ids, .. } => *ids,
|
|
||||||
DocumentFetchKind::PerDocumentId { .. } => 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
Self {
|
|
||||||
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
|
|
||||||
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
|
|
||||||
max_limit: limit,
|
|
||||||
max_offset: offset,
|
|
||||||
retrieve_vectors,
|
|
||||||
max_document_ids: ids,
|
|
||||||
|
|
||||||
marker: PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
|
impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
|
||||||
fn event_name(&self) -> &'static str {
|
fn event_name(&self) -> &'static str {
|
||||||
Method::event_name()
|
Method::event_name()
|
||||||
@ -193,6 +166,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
|
|||||||
Box::new(Self {
|
Box::new(Self {
|
||||||
per_document_id: self.per_document_id | new.per_document_id,
|
per_document_id: self.per_document_id | new.per_document_id,
|
||||||
per_filter: self.per_filter | new.per_filter,
|
per_filter: self.per_filter | new.per_filter,
|
||||||
|
sort: self.sort | new.sort,
|
||||||
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
|
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
|
||||||
max_limit: self.max_limit.max(new.max_limit),
|
max_limit: self.max_limit.max(new.max_limit),
|
||||||
max_offset: self.max_offset.max(new.max_offset),
|
max_offset: self.max_offset.max(new.max_offset),
|
||||||
@ -276,6 +250,7 @@ pub async fn get_document(
|
|||||||
retrieve_vectors: param_retrieve_vectors.0,
|
retrieve_vectors: param_retrieve_vectors.0,
|
||||||
per_document_id: true,
|
per_document_id: true,
|
||||||
per_filter: false,
|
per_filter: false,
|
||||||
|
sort: false,
|
||||||
max_limit: 0,
|
max_limit: 0,
|
||||||
max_offset: 0,
|
max_offset: 0,
|
||||||
max_document_ids: 0,
|
max_document_ids: 0,
|
||||||
@ -406,6 +381,8 @@ pub struct BrowseQueryGet {
|
|||||||
#[param(default, value_type = Option<String>, example = "popularity > 1000")]
|
#[param(default, value_type = Option<String>, example = "popularity > 1000")]
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
|
||||||
filter: Option<String>,
|
filter: Option<String>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentSort>)]
|
||||||
|
sort: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserr, ToSchema)]
|
#[derive(Debug, Deserr, ToSchema)]
|
||||||
@ -430,6 +407,9 @@ pub struct BrowseQuery {
|
|||||||
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
|
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
||||||
filter: Option<Value>,
|
filter: Option<Value>,
|
||||||
|
#[schema(default, value_type = Option<Vec<String>>, example = json!(["title:asc", "rating:desc"]))]
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidDocumentSort>)]
|
||||||
|
sort: Option<Vec<String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get documents with POST
|
/// Get documents with POST
|
||||||
@ -495,6 +475,7 @@ pub async fn documents_by_query_post(
|
|||||||
analytics.publish(
|
analytics.publish(
|
||||||
DocumentsFetchAggregator::<DocumentsPOST> {
|
DocumentsFetchAggregator::<DocumentsPOST> {
|
||||||
per_filter: body.filter.is_some(),
|
per_filter: body.filter.is_some(),
|
||||||
|
sort: body.sort.is_some(),
|
||||||
retrieve_vectors: body.retrieve_vectors,
|
retrieve_vectors: body.retrieve_vectors,
|
||||||
max_limit: body.limit,
|
max_limit: body.limit,
|
||||||
max_offset: body.offset,
|
max_offset: body.offset,
|
||||||
@ -571,7 +552,7 @@ pub async fn get_documents(
|
|||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
debug!(parameters = ?params, "Get documents GET");
|
debug!(parameters = ?params, "Get documents GET");
|
||||||
|
|
||||||
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } =
|
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids, sort } =
|
||||||
params.into_inner();
|
params.into_inner();
|
||||||
|
|
||||||
let filter = match filter {
|
let filter = match filter {
|
||||||
@ -582,20 +563,20 @@ pub async fn get_documents(
|
|||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect());
|
|
||||||
|
|
||||||
let query = BrowseQuery {
|
let query = BrowseQuery {
|
||||||
offset: offset.0,
|
offset: offset.0,
|
||||||
limit: limit.0,
|
limit: limit.0,
|
||||||
fields: fields.merge_star_and_none(),
|
fields: fields.merge_star_and_none(),
|
||||||
retrieve_vectors: retrieve_vectors.0,
|
retrieve_vectors: retrieve_vectors.0,
|
||||||
filter,
|
filter,
|
||||||
ids,
|
ids: ids.map(|ids| ids.into_iter().map(Into::into).collect()),
|
||||||
|
sort: sort.map(|attr| fix_sort_query_parameters(&attr)),
|
||||||
};
|
};
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
DocumentsFetchAggregator::<DocumentsGET> {
|
DocumentsFetchAggregator::<DocumentsGET> {
|
||||||
per_filter: query.filter.is_some(),
|
per_filter: query.filter.is_some(),
|
||||||
|
sort: query.sort.is_some(),
|
||||||
retrieve_vectors: query.retrieve_vectors,
|
retrieve_vectors: query.retrieve_vectors,
|
||||||
max_limit: query.limit,
|
max_limit: query.limit,
|
||||||
max_offset: query.offset,
|
max_offset: query.offset,
|
||||||
@ -615,7 +596,7 @@ fn documents_by_query(
|
|||||||
query: BrowseQuery,
|
query: BrowseQuery,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query;
|
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids, sort } = query;
|
||||||
|
|
||||||
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
|
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
|
||||||
|
|
||||||
@ -633,6 +614,18 @@ fn documents_by_query(
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let sort_criteria = if let Some(sort) = &sort {
|
||||||
|
let sorts: Vec<_> = match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
|
||||||
|
Ok(sorts) => sorts,
|
||||||
|
Err(asc_desc_error) => {
|
||||||
|
return Err(milli::SortError::from(asc_desc_error).into_document_error().into())
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Some(sorts)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let (total, documents) = retrieve_documents(
|
let (total, documents) = retrieve_documents(
|
||||||
&index,
|
&index,
|
||||||
@ -643,6 +636,7 @@ fn documents_by_query(
|
|||||||
fields,
|
fields,
|
||||||
retrieve_vectors,
|
retrieve_vectors,
|
||||||
index_scheduler.features(),
|
index_scheduler.features(),
|
||||||
|
sort_criteria,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
||||||
@ -1452,7 +1446,6 @@ fn some_documents<'a, 't: 'a>(
|
|||||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
|
||||||
|
|
||||||
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
||||||
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
|
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
|
||||||
@ -1468,15 +1461,13 @@ fn some_documents<'a, 't: 'a>(
|
|||||||
Some(Value::Object(map)) => map,
|
Some(Value::Object(map)) => map,
|
||||||
_ => Default::default(),
|
_ => Default::default(),
|
||||||
};
|
};
|
||||||
for (name, vector) in index.embeddings(rtxn, key)? {
|
for (
|
||||||
let user_provided = embedding_configs
|
name,
|
||||||
.iter()
|
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments: _ },
|
||||||
.find(|conf| conf.name == name)
|
) in index.embeddings(rtxn, key)?
|
||||||
.is_some_and(|conf| conf.user_provided.contains(key));
|
{
|
||||||
let embeddings = ExplicitVectors {
|
let embeddings =
|
||||||
embeddings: Some(vector.into()),
|
ExplicitVectors { embeddings: Some(embeddings.into()), regenerate };
|
||||||
regenerate: !user_provided,
|
|
||||||
};
|
|
||||||
vectors.insert(
|
vectors.insert(
|
||||||
name,
|
name,
|
||||||
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
||||||
@ -1501,6 +1492,7 @@ fn retrieve_documents<S: AsRef<str>>(
|
|||||||
attributes_to_retrieve: Option<Vec<S>>,
|
attributes_to_retrieve: Option<Vec<S>>,
|
||||||
retrieve_vectors: RetrieveVectors,
|
retrieve_vectors: RetrieveVectors,
|
||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
|
sort_criteria: Option<Vec<AscDesc>>,
|
||||||
) -> Result<(u64, Vec<Document>), ResponseError> {
|
) -> Result<(u64, Vec<Document>), ResponseError> {
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
let filter = &filter;
|
let filter = &filter;
|
||||||
@ -1533,15 +1525,32 @@ fn retrieve_documents<S: AsRef<str>>(
|
|||||||
})?
|
})?
|
||||||
}
|
}
|
||||||
|
|
||||||
let (it, number_of_documents) = {
|
let (it, number_of_documents) = if let Some(sort) = sort_criteria {
|
||||||
|
let number_of_documents = candidates.len();
|
||||||
|
let facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?;
|
||||||
|
let iter = facet_sort.iter()?;
|
||||||
|
let mut documents = Vec::with_capacity(limit);
|
||||||
|
for result in iter.skip(offset).take(limit) {
|
||||||
|
documents.push(result?);
|
||||||
|
}
|
||||||
|
(
|
||||||
|
itertools::Either::Left(some_documents(
|
||||||
|
index,
|
||||||
|
&rtxn,
|
||||||
|
documents.into_iter(),
|
||||||
|
retrieve_vectors,
|
||||||
|
)?),
|
||||||
|
number_of_documents,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
let number_of_documents = candidates.len();
|
let number_of_documents = candidates.len();
|
||||||
(
|
(
|
||||||
some_documents(
|
itertools::Either::Right(some_documents(
|
||||||
index,
|
index,
|
||||||
&rtxn,
|
&rtxn,
|
||||||
candidates.into_iter().skip(offset).take(limit),
|
candidates.into_iter().skip(offset).take(limit),
|
||||||
retrieve_vectors,
|
retrieve_vectors,
|
||||||
)?,
|
)?),
|
||||||
number_of_documents,
|
number_of_documents,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
@ -1,242 +0,0 @@
|
|||||||
use actix_web::web::{self, Data};
|
|
||||||
use actix_web::{HttpRequest, HttpResponse};
|
|
||||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
|
||||||
use index_scheduler::IndexScheduler;
|
|
||||||
use meilisearch_types::deserr::query_params::Param;
|
|
||||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
|
||||||
use meilisearch_types::error::deserr_codes::*;
|
|
||||||
use meilisearch_types::error::ResponseError;
|
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
|
||||||
use meilisearch_types::keys::actions;
|
|
||||||
use meilisearch_types::serde_cs::vec::CS;
|
|
||||||
use serde_json::Value;
|
|
||||||
use tracing::debug;
|
|
||||||
use utoipa::{IntoParams, OpenApi};
|
|
||||||
|
|
||||||
use super::ActionPolicy;
|
|
||||||
use crate::analytics::Analytics;
|
|
||||||
use crate::extractors::authentication::GuardedData;
|
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
|
||||||
use crate::routes::indexes::similar_analytics::{SimilarAggregator, SimilarGET, SimilarPOST};
|
|
||||||
use crate::search::{
|
|
||||||
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, Route,
|
|
||||||
SearchKind, SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(OpenApi)]
|
|
||||||
#[openapi(
|
|
||||||
paths(similar_get, similar_post),
|
|
||||||
tags(
|
|
||||||
(
|
|
||||||
name = "Duplicate an index",
|
|
||||||
description = "The /duplicate route clones an index",
|
|
||||||
external_docs(url = "https://www.meilisearch.com/docs/reference/api/duplicate"),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
)]
|
|
||||||
pub struct DuplicateApi;
|
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
|
||||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(duplicate))));
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Duplicate an index
|
|
||||||
#[utoipa::path(
|
|
||||||
post,
|
|
||||||
path = "{indexUid}/duplicate",
|
|
||||||
tag = "Duplicate an index",
|
|
||||||
security(("Bearer" = ["settings", "documents", "*"])),
|
|
||||||
params(("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false)),
|
|
||||||
request_body = DuplicateQuery,
|
|
||||||
responses(
|
|
||||||
(status = 200, description = "The documents are returned", body = SimilarResult, content_type = "application/json", example = json!(
|
|
||||||
{
|
|
||||||
"hits": [
|
|
||||||
{
|
|
||||||
"id": 2770,
|
|
||||||
"title": "American Pie 2",
|
|
||||||
"poster": "https://image.tmdb.org/t/p/w1280/q4LNgUnRfltxzp3gf1MAGiK5LhV.jpg",
|
|
||||||
"overview": "The whole gang are back and as close as ever. They decide to get even closer by spending the summer together at a beach house. They decide to hold the biggest…",
|
|
||||||
"release_date": 997405200
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 190859,
|
|
||||||
"title": "American Sniper",
|
|
||||||
"poster": "https://image.tmdb.org/t/p/w1280/svPHnYE7N5NAGO49dBmRhq0vDQ3.jpg",
|
|
||||||
"overview": "U.S. Navy SEAL Chris Kyle takes his sole mission—protect his comrades—to heart and becomes one of the most lethal snipers in American history. His pinpoint accuracy not only saves countless lives but also makes him a prime…",
|
|
||||||
"release_date": 1418256000
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"offset": 0,
|
|
||||||
"limit": 2,
|
|
||||||
"estimatedTotalHits": 976,
|
|
||||||
"processingTimeMs": 35,
|
|
||||||
"query": "american "
|
|
||||||
}
|
|
||||||
)),
|
|
||||||
(status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!(
|
|
||||||
{
|
|
||||||
"message": "Index `movies` not found.",
|
|
||||||
"code": "index_not_found",
|
|
||||||
"type": "invalid_request",
|
|
||||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
|
||||||
}
|
|
||||||
)),
|
|
||||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
|
||||||
{
|
|
||||||
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
|
||||||
"code": "missing_authorization_header",
|
|
||||||
"type": "auth",
|
|
||||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
|
||||||
}
|
|
||||||
)),
|
|
||||||
)
|
|
||||||
)]
|
|
||||||
pub async fn similar_post(
|
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
|
||||||
index_uid: web::Path<String>,
|
|
||||||
params: AwebJson<DuplicateQuery, DeserrJsonError>,
|
|
||||||
req: HttpRequest,
|
|
||||||
analytics: web::Data<Analytics>,
|
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
|
||||||
|
|
||||||
let query = params.into_inner();
|
|
||||||
debug!(parameters = ?query, "Similar post");
|
|
||||||
|
|
||||||
let mut aggregate = SimilarAggregator::<SimilarPOST>::from_query(&query);
|
|
||||||
|
|
||||||
let similar = similar(index_scheduler, index_uid, query).await;
|
|
||||||
|
|
||||||
if let Ok(similar) = &similar {
|
|
||||||
aggregate.succeed(similar);
|
|
||||||
}
|
|
||||||
analytics.publish(aggregate, &req);
|
|
||||||
|
|
||||||
let similar = similar?;
|
|
||||||
|
|
||||||
debug!(returns = ?similar, "Similar post");
|
|
||||||
Ok(HttpResponse::Ok().json(similar))
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn similar(
|
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
|
||||||
index_uid: IndexUid,
|
|
||||||
mut query: SimilarQuery,
|
|
||||||
) -> Result<SimilarResult, ResponseError> {
|
|
||||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
|
||||||
|
|
||||||
// Tenant token search_rules.
|
|
||||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
|
||||||
add_search_rules(&mut query.filter, search_rules);
|
|
||||||
}
|
|
||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
|
||||||
|
|
||||||
let (embedder_name, embedder, quantized) = SearchKind::embedder(
|
|
||||||
&index_scheduler,
|
|
||||||
index_uid.to_string(),
|
|
||||||
&index,
|
|
||||||
&query.embedder,
|
|
||||||
None,
|
|
||||||
Route::Similar,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || {
|
|
||||||
perform_similar(
|
|
||||||
&index,
|
|
||||||
query,
|
|
||||||
embedder_name,
|
|
||||||
embedder,
|
|
||||||
quantized,
|
|
||||||
retrieve_vectors,
|
|
||||||
index_scheduler.features(),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.await?
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, deserr::Deserr, IntoParams)]
|
|
||||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
|
||||||
#[into_params(parameter_in = Query)]
|
|
||||||
pub struct SimilarQueryGet {
|
|
||||||
#[deserr(error = DeserrQueryParamError<InvalidSimilarId>)]
|
|
||||||
#[param(value_type = String)]
|
|
||||||
id: Param<String>,
|
|
||||||
#[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSimilarOffset>)]
|
|
||||||
#[param(value_type = usize, default = DEFAULT_SEARCH_OFFSET)]
|
|
||||||
offset: Param<usize>,
|
|
||||||
#[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSimilarLimit>)]
|
|
||||||
#[param(value_type = usize, default = DEFAULT_SEARCH_LIMIT)]
|
|
||||||
limit: Param<usize>,
|
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)]
|
|
||||||
#[param(value_type = Vec<String>)]
|
|
||||||
attributes_to_retrieve: Option<CS<String>>,
|
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRetrieveVectors>)]
|
|
||||||
#[param(value_type = bool, default)]
|
|
||||||
retrieve_vectors: Param<bool>,
|
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)]
|
|
||||||
filter: Option<String>,
|
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)]
|
|
||||||
#[param(value_type = bool, default)]
|
|
||||||
show_ranking_score: Param<bool>,
|
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
|
|
||||||
#[param(value_type = bool, default)]
|
|
||||||
show_ranking_score_details: Param<bool>,
|
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
|
|
||||||
#[param(value_type = Option<f32>)]
|
|
||||||
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
|
||||||
#[deserr(error = DeserrQueryParamError<InvalidSimilarEmbedder>)]
|
|
||||||
pub embedder: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
|
||||||
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
|
|
||||||
pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar);
|
|
||||||
|
|
||||||
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
|
|
||||||
type Error = InvalidSimilarRankingScoreThreshold;
|
|
||||||
|
|
||||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
|
||||||
let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?;
|
|
||||||
Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<SimilarQueryGet> for SimilarQuery {
|
|
||||||
fn from(
|
|
||||||
SimilarQueryGet {
|
|
||||||
id,
|
|
||||||
offset,
|
|
||||||
limit,
|
|
||||||
attributes_to_retrieve,
|
|
||||||
retrieve_vectors,
|
|
||||||
filter,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
embedder,
|
|
||||||
ranking_score_threshold,
|
|
||||||
}: SimilarQueryGet,
|
|
||||||
) -> Self {
|
|
||||||
let filter = match filter {
|
|
||||||
Some(f) => match serde_json::from_str(&f) {
|
|
||||||
Ok(v) => Some(v),
|
|
||||||
_ => Some(Value::String(f)),
|
|
||||||
},
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
SimilarQuery {
|
|
||||||
id: serde_json::Value::String(id.0),
|
|
||||||
offset: offset.0,
|
|
||||||
limit: limit.0,
|
|
||||||
filter,
|
|
||||||
embedder,
|
|
||||||
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
|
||||||
retrieve_vectors: retrieve_vectors.0,
|
|
||||||
show_ranking_score: show_ranking_score.0,
|
|
||||||
show_ranking_score_details: show_ranking_score_details.0,
|
|
||||||
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -56,6 +56,8 @@ pub struct FacetSearchQuery {
|
|||||||
pub q: Option<String>,
|
pub q: Option<String>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||||
pub vector: Option<Vec<f32>>,
|
pub vector: Option<Vec<f32>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchMedia>)]
|
||||||
|
pub media: Option<Value>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHybridQuery>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchHybridQuery>)]
|
||||||
pub hybrid: Option<HybridQuery>,
|
pub hybrid: Option<HybridQuery>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||||
@ -94,6 +96,7 @@ impl FacetSearchAggregator {
|
|||||||
facet_name,
|
facet_name,
|
||||||
vector,
|
vector,
|
||||||
q,
|
q,
|
||||||
|
media,
|
||||||
filter,
|
filter,
|
||||||
matching_strategy,
|
matching_strategy,
|
||||||
attributes_to_search_on,
|
attributes_to_search_on,
|
||||||
@ -108,6 +111,7 @@ impl FacetSearchAggregator {
|
|||||||
facet_names: Some(facet_name.clone()).into_iter().collect(),
|
facet_names: Some(facet_name.clone()).into_iter().collect(),
|
||||||
additional_search_parameters_provided: q.is_some()
|
additional_search_parameters_provided: q.is_some()
|
||||||
|| vector.is_some()
|
|| vector.is_some()
|
||||||
|
|| media.is_some()
|
||||||
|| filter.is_some()
|
|| filter.is_some()
|
||||||
|| *matching_strategy != MatchingStrategy::default()
|
|| *matching_strategy != MatchingStrategy::default()
|
||||||
|| attributes_to_search_on.is_some()
|
|| attributes_to_search_on.is_some()
|
||||||
@ -291,6 +295,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
|||||||
facet_name: _,
|
facet_name: _,
|
||||||
q,
|
q,
|
||||||
vector,
|
vector,
|
||||||
|
media,
|
||||||
filter,
|
filter,
|
||||||
matching_strategy,
|
matching_strategy,
|
||||||
attributes_to_search_on,
|
attributes_to_search_on,
|
||||||
@ -312,6 +317,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
|||||||
|
|
||||||
SearchQuery {
|
SearchQuery {
|
||||||
q,
|
q,
|
||||||
|
media,
|
||||||
offset: DEFAULT_SEARCH_OFFSET(),
|
offset: DEFAULT_SEARCH_OFFSET(),
|
||||||
limit: DEFAULT_SEARCH_LIMIT(),
|
limit: DEFAULT_SEARCH_LIMIT(),
|
||||||
page,
|
page,
|
||||||
|
@ -29,7 +29,6 @@ use crate::routes::is_dry_run;
|
|||||||
use crate::Opt;
|
use crate::Opt;
|
||||||
|
|
||||||
pub mod documents;
|
pub mod documents;
|
||||||
pub mod duplicate;
|
|
||||||
pub mod facet_search;
|
pub mod facet_search;
|
||||||
pub mod search;
|
pub mod search;
|
||||||
mod search_analytics;
|
mod search_analytics;
|
||||||
@ -78,8 +77,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
.service(web::scope("/search").configure(search::configure))
|
.service(web::scope("/search").configure(search::configure))
|
||||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||||
.service(web::scope("/similar").configure(similar::configure))
|
.service(web::scope("/similar").configure(similar::configure))
|
||||||
.service(web::scope("/settings").configure(settings::configure))
|
.service(web::scope("/settings").configure(settings::configure)),
|
||||||
.service(web::scope("/duplicate").configure(duplicate::configure)),
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,6 +205,8 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
|||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
q: other.q,
|
q: other.q,
|
||||||
|
// `media` not supported for `GET`
|
||||||
|
media: None,
|
||||||
vector: other.vector.map(CS::into_inner),
|
vector: other.vector.map(CS::into_inner),
|
||||||
offset: other.offset.0,
|
offset: other.offset.0,
|
||||||
limit: other.limit.0,
|
limit: other.limit.0,
|
||||||
@ -481,28 +483,30 @@ pub fn search_kind(
|
|||||||
index_uid: String,
|
index_uid: String,
|
||||||
index: &milli::Index,
|
index: &milli::Index,
|
||||||
) -> Result<SearchKind, ResponseError> {
|
) -> Result<SearchKind, ResponseError> {
|
||||||
|
let is_placeholder_query =
|
||||||
|
if let Some(q) = query.q.as_deref() { q.trim().is_empty() } else { true };
|
||||||
|
let non_placeholder_query = !is_placeholder_query;
|
||||||
|
let is_media = query.media.is_some();
|
||||||
// handle with care, the order of cases matters, the semantics is subtle
|
// handle with care, the order of cases matters, the semantics is subtle
|
||||||
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
|
match (is_media, non_placeholder_query, &query.hybrid, query.vector.as_deref()) {
|
||||||
// empty query, no vector => placeholder search
|
// media + vector => error
|
||||||
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
|
(true, _, _, Some(_)) => Err(MeilisearchHttpError::MediaAndVector.into()),
|
||||||
// no query, no vector => placeholder search
|
// media + !hybrid => error
|
||||||
(None, _, None) => Ok(SearchKind::KeywordOnly),
|
(true, _, None, _) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||||
// hybrid.semantic_ratio == 1.0 => vector
|
// vector + !hybrid => error
|
||||||
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
(_, _, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
// hybrid S0 => keyword
|
||||||
}
|
(_, _, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||||
// hybrid.semantic_ratio == 0.0 => keyword
|
|
||||||
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
|
||||||
Ok(SearchKind::KeywordOnly)
|
Ok(SearchKind::KeywordOnly)
|
||||||
}
|
}
|
||||||
// no query, hybrid, vector => semantic
|
// !q + !vector => placeholder search
|
||||||
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
|
(false, false, _, None) => Ok(SearchKind::KeywordOnly),
|
||||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, Some(v.len()))
|
// hybrid S100 => semantic
|
||||||
|
(_, _, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||||
|
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||||
}
|
}
|
||||||
// query, no hybrid, no vector => keyword
|
// q + hybrid => hybrid
|
||||||
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
|
(_, true, Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||||
// query, hybrid, maybe vector => hybrid
|
|
||||||
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
|
||||||
index_scheduler,
|
index_scheduler,
|
||||||
index_uid,
|
index_uid,
|
||||||
index,
|
index,
|
||||||
@ -510,7 +514,11 @@ pub fn search_kind(
|
|||||||
**semantic_ratio,
|
**semantic_ratio,
|
||||||
v.map(|v| v.len()),
|
v.map(|v| v.len()),
|
||||||
),
|
),
|
||||||
|
// !q + hybrid => semantic
|
||||||
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
(_, false, Some(HybridQuery { semantic_ratio: _, embedder }), v) => {
|
||||||
|
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||||
|
}
|
||||||
|
// q => keyword
|
||||||
|
(false, true, None, None) => Ok(SearchKind::KeywordOnly),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -61,6 +61,8 @@ pub struct SearchAggregator<Method: AggregateMethod> {
|
|||||||
semantic_ratio: bool,
|
semantic_ratio: bool,
|
||||||
hybrid: bool,
|
hybrid: bool,
|
||||||
retrieve_vectors: bool,
|
retrieve_vectors: bool,
|
||||||
|
// Number of requests containing `media`
|
||||||
|
total_media: usize,
|
||||||
|
|
||||||
// every time a search is done, we increment the counter linked to the used settings
|
// every time a search is done, we increment the counter linked to the used settings
|
||||||
matching_strategy: HashMap<String, usize>,
|
matching_strategy: HashMap<String, usize>,
|
||||||
@ -101,6 +103,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
|||||||
let SearchQuery {
|
let SearchQuery {
|
||||||
q,
|
q,
|
||||||
vector,
|
vector,
|
||||||
|
media,
|
||||||
offset,
|
offset,
|
||||||
limit,
|
limit,
|
||||||
page,
|
page,
|
||||||
@ -175,6 +178,11 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
|||||||
if let Some(ref vector) = vector {
|
if let Some(ref vector) = vector {
|
||||||
ret.max_vector_size = vector.len();
|
ret.max_vector_size = vector.len();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if media.is_some() {
|
||||||
|
ret.total_media = 1;
|
||||||
|
}
|
||||||
|
|
||||||
ret.retrieve_vectors |= retrieve_vectors;
|
ret.retrieve_vectors |= retrieve_vectors;
|
||||||
|
|
||||||
if query.is_finite_pagination() {
|
if query.is_finite_pagination() {
|
||||||
@ -277,6 +285,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
semantic_ratio,
|
semantic_ratio,
|
||||||
hybrid,
|
hybrid,
|
||||||
|
total_media,
|
||||||
total_degraded,
|
total_degraded,
|
||||||
total_used_negative_operator,
|
total_used_negative_operator,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
@ -327,6 +336,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
self.retrieve_vectors |= retrieve_vectors;
|
self.retrieve_vectors |= retrieve_vectors;
|
||||||
self.semantic_ratio |= semantic_ratio;
|
self.semantic_ratio |= semantic_ratio;
|
||||||
self.hybrid |= hybrid;
|
self.hybrid |= hybrid;
|
||||||
|
self.total_media += total_media;
|
||||||
|
|
||||||
// pagination
|
// pagination
|
||||||
self.max_limit = self.max_limit.max(max_limit);
|
self.max_limit = self.max_limit.max(max_limit);
|
||||||
@ -403,6 +413,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
semantic_ratio,
|
semantic_ratio,
|
||||||
hybrid,
|
hybrid,
|
||||||
|
total_media,
|
||||||
total_degraded,
|
total_degraded,
|
||||||
total_used_negative_operator,
|
total_used_negative_operator,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
@ -450,6 +461,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
"hybrid": {
|
"hybrid": {
|
||||||
"enabled": hybrid,
|
"enabled": hybrid,
|
||||||
"semantic_ratio": semantic_ratio,
|
"semantic_ratio": semantic_ratio,
|
||||||
|
"total_media": total_media,
|
||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"max_limit": max_limit,
|
"max_limit": max_limit,
|
||||||
|
@ -755,6 +755,14 @@ fn validate_settings(
|
|||||||
if matches!(embedder.indexing_embedder, Setting::Set(_)) {
|
if matches!(embedder.indexing_embedder, Setting::Set(_)) {
|
||||||
features.check_composite_embedders("setting `indexingEmbedder`")?;
|
features.check_composite_embedders("setting `indexingEmbedder`")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if matches!(embedder.indexing_fragments, Setting::Set(_)) {
|
||||||
|
features.check_multimodal("setting `indexingFragments`")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if matches!(embedder.search_fragments, Setting::Set(_)) {
|
||||||
|
features.check_multimodal("setting `searchFragments`")?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user