mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-27 00:00:29 +00:00
Compare commits
67 Commits
fix-fid-pr
...
staging
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ab474614b7 | ||
|
|
75c77a1e92 | ||
|
|
17ba2429ea | ||
|
|
19081af126 | ||
|
|
0cd4638035 | ||
|
|
cf31a65a88 | ||
|
|
0f7d71041f | ||
|
|
2500e3c067 | ||
|
|
2a46624e19 | ||
|
|
009c36a4d0 | ||
|
|
2a47e25e6d | ||
|
|
82912e191b | ||
|
|
e2d372823a | ||
|
|
1876132172 | ||
|
|
d0b0b90d17 | ||
|
|
b08544e86d | ||
|
|
d9111fe8ce | ||
|
|
41d8161017 | ||
|
|
7df5715d39 | ||
|
|
5fe02ab5e0 | ||
|
|
5ef7767429 | ||
|
|
3fad48167b | ||
|
|
a92a48b9b9 | ||
|
|
d53225bf64 | ||
|
|
20896500c2 | ||
|
|
1af520077c | ||
|
|
7e07cb9de1 | ||
|
|
a12b06d99d | ||
|
|
331dc3d241 | ||
|
|
ef9d9f8481 | ||
|
|
d3d22d8ed4 | ||
|
|
5e6abcf50c | ||
|
|
a4aaf932ba | ||
|
|
16c962eb30 | ||
|
|
55ca2c4481 | ||
|
|
fedb444e66 | ||
|
|
bef5954741 | ||
|
|
ff8cf38d6b | ||
|
|
f8ac575ec5 | ||
|
|
566b4efb06 | ||
|
|
1d499ed9b2 | ||
|
|
3bc62f0549 | ||
|
|
21bbbdec76 | ||
|
|
78ebd8dba2 | ||
|
|
34df44a002 | ||
|
|
48a27f669e | ||
|
|
e2d0ce52ba | ||
|
|
995f8962bd | ||
|
|
1cd00f37c0 | ||
|
|
1aa3375e12 | ||
|
|
60ff1b19a8 | ||
|
|
7df5e3f059 | ||
|
|
0197dc87e0 | ||
|
|
7a172b82ca | ||
|
|
eb3ff325d1 | ||
|
|
a2a86ef4e2 | ||
|
|
d0dda78f3d | ||
|
|
fa8afc5cfd | ||
|
|
aa32b719c7 | ||
|
|
41d2b1e52b | ||
|
|
54ee81bb09 | ||
|
|
9d9e0d4c54 | ||
|
|
19c9caed39 | ||
|
|
21c3b3957e | ||
|
|
f292fc9ac0 | ||
|
|
1d3c4642a6 | ||
|
|
bea28968a0 |
4
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
4
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
@@ -22,6 +22,10 @@ Related product discussion:
|
||||
|
||||
<!---If necessary, create a list with technical/product steps-->
|
||||
|
||||
### Are you modifying a database?
|
||||
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
|
||||
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
|
||||
|
||||
### Reminders when modifying the API
|
||||
|
||||
- [ ] Update the openAPI file with utoipa:
|
||||
|
||||
57
.github/workflows/db-change-comments.yml
vendored
Normal file
57
.github/workflows/db-change-comments.yml
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
name: Comment when db change labels are added
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
|
||||
env:
|
||||
MESSAGE: |
|
||||
### Hello, I'm a bot 🤖
|
||||
|
||||
You are receiving this message because you declared that this PR make changes to the Meilisearch database.
|
||||
Depending on the nature of the change, additional actions might be required on your part. The following sections detail the additional actions depending on the nature of the change, please copy the relevant section in the description of your PR, and make sure to perform the required actions.
|
||||
|
||||
Thank you for contributing to Meilisearch :heart:
|
||||
|
||||
## This PR makes forward-compatible changes
|
||||
|
||||
*Forward-compatible changes are changes to the database such that databases created in an older version of Meilisearch are still valid in the new version of Meilisearch. They usually represent additive changes, like adding a new optional attribute or setting.*
|
||||
|
||||
- [ ] Detail the change to the DB format and why they are forward compatible
|
||||
- [ ] Forward-compatibility: A database created before this PR and using the features touched by this PR was able to be opened by a Meilisearch produced by the code of this PR.
|
||||
|
||||
|
||||
## This PR makes breaking changes
|
||||
|
||||
*Breaking changes are changes to the database such that databases created in an older version of Meilisearch need changes to remain valid in the new version of Meilisearch. This typically happens when the way to store the data changed (change of database, new required key, etc). This can also happen due to breaking changes in the API of an experimental feature. ⚠️ This kind of changes are more difficult to achieve safely, so proceed with caution and test dumpless upgrade right before merging the PR.*
|
||||
|
||||
- [ ] Detail the changes to the DB format,
|
||||
- [ ] which are compatible, and why
|
||||
- [ ] which are not compatible, why, and how they will be fixed up in the upgrade
|
||||
- [ ] /!\ Ensure all the read operations still work!
|
||||
- If the change happened in milli, you may need to check the version of the database before doing any read operation
|
||||
- If the change happened in the index-scheduler, make sure the new code can immediately read the old database
|
||||
- If the change happened in the meilisearch-auth database, reach out to the team; we don't know yet how to handle these changes
|
||||
- [ ] Write the code to go from the old database to the new one
|
||||
- If the change happened in milli, the upgrade function should be written and called [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/milli/src/update/upgrade/mod.rs#L24-L47)
|
||||
- If the change happened in the index-scheduler, we've never done it yet, but the right place to do it should be [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs#L13)
|
||||
- [ ] Write an integration test [here](https://github.com/meilisearch/meilisearch/blob/main/crates/meilisearch/tests/upgrade/mod.rs) ensuring you can read the old database, upgrade to the new database, and read the new database as expected
|
||||
|
||||
|
||||
jobs:
|
||||
add-comment:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.label.name == 'db change'
|
||||
steps:
|
||||
- name: Add comment
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const message = process.env.MESSAGE;
|
||||
github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: message
|
||||
})
|
||||
28
.github/workflows/db-change-missing.yml
vendored
Normal file
28
.github/workflows/db-change-missing.yml
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
name: Check db change labels
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, labeled, unlabeled]
|
||||
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
|
||||
jobs:
|
||||
check-labels:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
- name: Check db change labels
|
||||
id: check_labels
|
||||
run: |
|
||||
URL=/repos/meilisearch/meilisearch/pulls/${{ github.event.pull_request.number }}/labels
|
||||
echo ${{ github.event.pull_request.number }}
|
||||
echo $URL
|
||||
LABELS=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /repos/meilisearch/meilisearch/issues/${{ github.event.pull_request.number }}/labels -q .[].name)
|
||||
if [[ ! "$LABELS" =~ "db change" && ! "$LABELS" =~ "no db change" ]]; then
|
||||
echo "::error::Pull request must contain either the 'db change' or 'no db change' label."
|
||||
exit 1
|
||||
else
|
||||
echo "The label is set"
|
||||
fi
|
||||
148
Cargo.lock
generated
148
Cargo.lock
generated
@@ -47,7 +47,7 @@ dependencies = [
|
||||
"actix-utils",
|
||||
"ahash 0.8.11",
|
||||
"base64 0.22.1",
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.0",
|
||||
"brotli",
|
||||
"bytes",
|
||||
"bytestring",
|
||||
@@ -393,41 +393,24 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
||||
|
||||
[[package]]
|
||||
name = "arroy"
|
||||
version = "0.5.0"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dfc5f272f38fa063bbff0a7ab5219404e221493de005e2b4078c62d626ef567e"
|
||||
checksum = "08e6111f351d004bd13e95ab540721272136fd3218b39d3ec95a2ea1c4e6a0a6"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"enum-iterator",
|
||||
"heed",
|
||||
"log",
|
||||
"memmap2",
|
||||
"nohash",
|
||||
"ordered-float",
|
||||
"page_size",
|
||||
"rand",
|
||||
"rayon",
|
||||
"roaring",
|
||||
"tempfile",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arroy"
|
||||
version = "0.5.0"
|
||||
source = "git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05#053807bf38dc079f25b003f19fc30fbf3613f6e7"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"heed",
|
||||
"log",
|
||||
"memmap2",
|
||||
"nohash",
|
||||
"ordered-float",
|
||||
"rand",
|
||||
"rayon",
|
||||
"roaring",
|
||||
"tempfile",
|
||||
"thiserror 1.0.69",
|
||||
"thiserror 2.0.9",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -503,7 +486,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bumpalo",
|
||||
@@ -553,7 +536,7 @@ version = "0.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.0",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools 0.13.0",
|
||||
@@ -599,9 +582,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.6.0"
|
||||
version = "2.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
||||
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
@@ -694,7 +677,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "build-info"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"time",
|
||||
@@ -944,13 +927,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.104"
|
||||
version = "1.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74b6a57f98764a267ff415d50a25e6e166f3831a5071af4995296ea97d210490"
|
||||
checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1671,7 +1654,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -1873,7 +1856,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"tempfile",
|
||||
"thiserror 2.0.9",
|
||||
@@ -1895,7 +1878,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@@ -1915,7 +1898,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -2054,7 +2037,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"bumpalo",
|
||||
@@ -2082,7 +2065,7 @@ version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce20bbb48248608ba4908b45fe36e17e40f56f8c6bb385ecf5d3c4a1e8b05a22"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.0",
|
||||
"debugid",
|
||||
"fxhash",
|
||||
"serde",
|
||||
@@ -2249,7 +2232,7 @@ version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.0",
|
||||
"libc",
|
||||
"libgit2-sys",
|
||||
"log",
|
||||
@@ -2397,11 +2380,11 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "heed"
|
||||
version = "0.20.5"
|
||||
version = "0.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d4f449bab7320c56003d37732a917e18798e2f1709d80263face2b4f9436ddb"
|
||||
checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.0",
|
||||
"byteorder",
|
||||
"heed-traits",
|
||||
"heed-types",
|
||||
@@ -2421,9 +2404,9 @@ checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff"
|
||||
|
||||
[[package]]
|
||||
name = "heed-types"
|
||||
version = "0.20.1"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9d3f528b053a6d700b2734eabcd0fd49cb8230647aa72958467527b0b7917114"
|
||||
checksum = "13c255bdf46e07fb840d120a36dcc81f385140d7191c76a7391672675c01a55d"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2743,10 +2726,9 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"big_s",
|
||||
"bincode",
|
||||
"bumpalo",
|
||||
@@ -2950,7 +2932,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -3013,9 +2995,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.169"
|
||||
version = "0.2.171"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
||||
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
|
||||
|
||||
[[package]]
|
||||
name = "libgit2-sys"
|
||||
@@ -3468,9 +3450,9 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
|
||||
|
||||
[[package]]
|
||||
name = "lmdb-master-sys"
|
||||
version = "0.2.4"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "472c3760e2a8d0f61f322fb36788021bb36d573c502b50fa3e2bcaac3ec326c9"
|
||||
checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"doxygen-rs",
|
||||
@@ -3513,9 +3495,18 @@ checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.21"
|
||||
version = "0.4.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
||||
checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
|
||||
|
||||
[[package]]
|
||||
name = "lru"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "227748d55f2f0ab4735d87fd623798cb6b664512fe979705f829c9f81c934465"
|
||||
dependencies = [
|
||||
"hashbrown 0.15.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lzma-rs"
|
||||
@@ -3569,7 +3560,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@@ -3578,7 +3569,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@@ -3670,7 +3661,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"enum-iterator",
|
||||
@@ -3689,7 +3680,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@@ -3723,10 +3714,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilitool"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)",
|
||||
"clap",
|
||||
"dump",
|
||||
"file-store",
|
||||
@@ -3758,10 +3748,10 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"arroy",
|
||||
"bbqueue",
|
||||
"big_s",
|
||||
"bimap",
|
||||
@@ -3798,6 +3788,7 @@ dependencies = [
|
||||
"json-depth-checker",
|
||||
"levenshtein_automata",
|
||||
"liquid",
|
||||
"lru",
|
||||
"maplit",
|
||||
"md5",
|
||||
"meili-snap",
|
||||
@@ -4129,9 +4120,9 @@ checksum = "ae4512a8f418ac322335255a72361b9ac927e106f4d7fe6ab4d8ac59cb01f7a9"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.20.2"
|
||||
version = "1.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
|
||||
checksum = "cde51589ab56b20a6f686b2c68f7a0bd6add753d697abf720d63f8db3ab7b1ad"
|
||||
|
||||
[[package]]
|
||||
name = "onig"
|
||||
@@ -4270,7 +4261,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@@ -4518,7 +4509,7 @@ version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.0",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"procfs-core",
|
||||
@@ -4531,7 +4522,7 @@ version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.0",
|
||||
"hex",
|
||||
]
|
||||
|
||||
@@ -4872,7 +4863,7 @@ version = "1.20.0"
|
||||
source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.0",
|
||||
"instant",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
@@ -4895,15 +4886,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.8"
|
||||
version = "0.17.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
|
||||
checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"getrandom",
|
||||
"libc",
|
||||
"spin",
|
||||
"untrusted",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
@@ -5009,7 +4999,7 @@ version = "0.38.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.0",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
@@ -5131,9 +5121,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.217"
|
||||
version = "1.0.219"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
|
||||
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
@@ -5149,9 +5139,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.217"
|
||||
version = "1.0.219"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
|
||||
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -5160,9 +5150,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.138"
|
||||
version = "1.0.140"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949"
|
||||
checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
@@ -5530,7 +5520,7 @@ version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"bitflags 2.9.0",
|
||||
"byteorder",
|
||||
"enum-as-inner",
|
||||
"libc",
|
||||
@@ -6847,7 +6837,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"build-info",
|
||||
|
||||
@@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.13.3"
|
||||
version = "1.14.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
@@ -36,6 +36,12 @@ license = "MIT"
|
||||
[profile.release]
|
||||
codegen-units = 1
|
||||
|
||||
# We now compile heed without the NDEBUG define for better performance.
|
||||
# However, we still enable debug assertions for a better detection of
|
||||
# disk corruption on the cloud or in OSS.
|
||||
[profile.release.package.heed]
|
||||
debug-assertions = true
|
||||
|
||||
[profile.dev.package.flate2]
|
||||
opt-level = 3
|
||||
|
||||
|
||||
@@ -23,6 +23,12 @@
|
||||
<a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
|
||||
</p>
|
||||
|
||||
<p align="center" name="ph-banner">
|
||||
<a href="https://www.producthunt.com/products/meilisearch-cloud">
|
||||
<img src="assets/ph-banner.png" alt="Meilisearch AI-powered search general availability announcement on ProductHunt">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
|
||||
[Meilisearch](https://www.meilisearch.com?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=intro) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
|
||||
|
||||
BIN
assets/ph-banner.png
Normal file
BIN
assets/ph-banner.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 578 KiB |
@@ -5,6 +5,7 @@ status = [
|
||||
'Run Clippy',
|
||||
'Run Rustfmt',
|
||||
'Run tests in debug',
|
||||
'Check db change labels',
|
||||
]
|
||||
# 3 hours timeout
|
||||
timeout-sec = 10800
|
||||
|
||||
@@ -35,7 +35,8 @@ fn setup_dir(path: impl AsRef<Path>) {
|
||||
fn setup_index() -> Index {
|
||||
let path = "benches.mmdb";
|
||||
setup_dir(path);
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(100);
|
||||
Index::new(options, path, true).unwrap()
|
||||
|
||||
@@ -65,7 +65,8 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
}
|
||||
create_dir_all(conf.database_name).unwrap();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(100);
|
||||
let index = Index::new(options, conf.database_name, true).unwrap();
|
||||
|
||||
@@ -57,7 +57,8 @@ fn main() {
|
||||
let opt = opt.clone();
|
||||
|
||||
let handle = std::thread::spawn(move || {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(1024 * 1024 * 1024 * 1024);
|
||||
let tempdir = match opt.path {
|
||||
Some(path) => TempDir::new_in(path).unwrap(),
|
||||
|
||||
@@ -44,7 +44,6 @@ ureq = "2.12.1"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.5.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam-channel = "0.5.14"
|
||||
# fixed version due to format breakages in v1.40
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
@@ -118,6 +118,19 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.composite_embedders {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "composite embedders",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
@@ -126,7 +139,7 @@ impl FeatureData {
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
env: &Env,
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
instance_features: InstanceTogglableFeatures,
|
||||
) -> Result<Self> {
|
||||
|
||||
@@ -304,7 +304,8 @@ fn create_or_open_index(
|
||||
map_size: usize,
|
||||
creation: bool,
|
||||
) -> Result<Index> {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(clamp_to_page_size(map_size));
|
||||
|
||||
// You can find more details about this experimental
|
||||
@@ -333,7 +334,7 @@ fn create_or_open_index(
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use meilisearch_types::heed::Env;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::Index;
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -343,7 +344,7 @@ mod tests {
|
||||
use crate::IndexScheduler;
|
||||
|
||||
impl IndexMapper {
|
||||
fn test() -> (Self, Env, IndexSchedulerHandle) {
|
||||
fn test() -> (Self, Env<WithoutTls>, IndexSchedulerHandle) {
|
||||
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
|
||||
(index_scheduler.index_mapper, index_scheduler.env, handle)
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::time::Duration;
|
||||
use std::{fs, thread};
|
||||
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::database_stats::DatabaseStats;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
@@ -164,7 +164,7 @@ impl IndexMapper {
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
env: &Env,
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
options: &IndexSchedulerOptions,
|
||||
budget: IndexBudget,
|
||||
|
||||
@@ -54,7 +54,7 @@ use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::byteorder::BE;
|
||||
use meilisearch_types::heed::types::I128;
|
||||
use meilisearch_types::heed::{self, Env, RoTxn};
|
||||
use meilisearch_types::heed::{self, Env, RoTxn, WithoutTls};
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||
@@ -125,13 +125,17 @@ pub struct IndexSchedulerOptions {
|
||||
pub instance_features: InstanceTogglableFeatures,
|
||||
/// The experimental features enabled for this instance.
|
||||
pub auto_upgrade: bool,
|
||||
/// The maximal number of entries in the search query cache of an embedder.
|
||||
///
|
||||
/// 0 disables the cache.
|
||||
pub embedding_cache_cap: usize,
|
||||
}
|
||||
|
||||
/// Structure which holds meilisearch's indexes and schedules the tasks
|
||||
/// to be performed on them.
|
||||
pub struct IndexScheduler {
|
||||
/// The LMDB environment which the DBs are associated with.
|
||||
pub(crate) env: Env,
|
||||
pub(crate) env: Env<WithoutTls>,
|
||||
|
||||
/// The list of tasks currently processing
|
||||
pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>,
|
||||
@@ -156,6 +160,11 @@ pub struct IndexScheduler {
|
||||
/// The Authorization header to send to the webhook URL.
|
||||
pub(crate) webhook_authorization_header: Option<String>,
|
||||
|
||||
/// A map to retrieve the runtime representation of an embedder depending on its configuration.
|
||||
///
|
||||
/// This map may return the same embedder object for two different indexes or embedder settings,
|
||||
/// but it will only do this if the embedder configuration options are the same, leading
|
||||
/// to the same embeddings for the same input text.
|
||||
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
|
||||
|
||||
// ================= test
|
||||
@@ -209,6 +218,7 @@ impl IndexScheduler {
|
||||
#[allow(private_interfaces)] // because test_utils is private
|
||||
pub fn new(
|
||||
options: IndexSchedulerOptions,
|
||||
auth_env: Env<WithoutTls>,
|
||||
from_db_version: (u32, u32, u32),
|
||||
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
|
||||
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
|
||||
@@ -240,7 +250,9 @@ impl IndexScheduler {
|
||||
};
|
||||
|
||||
let env = unsafe {
|
||||
heed::EnvOpenOptions::new()
|
||||
let env_options = heed::EnvOpenOptions::new();
|
||||
let mut env_options = env_options.read_txn_without_tls();
|
||||
env_options
|
||||
.max_dbs(Self::nb_db())
|
||||
.map_size(budget.task_db_size)
|
||||
.open(&options.tasks_path)
|
||||
@@ -260,7 +272,7 @@ impl IndexScheduler {
|
||||
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
|
||||
version,
|
||||
queue,
|
||||
scheduler: Scheduler::new(&options),
|
||||
scheduler: Scheduler::new(&options, auth_env),
|
||||
|
||||
index_mapper,
|
||||
env,
|
||||
@@ -358,7 +370,7 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_txn(&self) -> Result<RoTxn> {
|
||||
pub fn read_txn(&self) -> Result<RoTxn<WithoutTls>> {
|
||||
self.env.read_txn().map_err(|e| e.into())
|
||||
}
|
||||
|
||||
@@ -427,12 +439,14 @@ impl IndexScheduler {
|
||||
/// If you need to fetch information from or perform an action on all indexes,
|
||||
/// see the `try_for_each_index` function.
|
||||
pub fn index(&self, name: &str) -> Result<Index> {
|
||||
self.index_mapper.index(&self.env.read_txn()?, name)
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, name)
|
||||
}
|
||||
|
||||
/// Return the boolean referring if index exists.
|
||||
pub fn index_exists(&self, name: &str) -> Result<bool> {
|
||||
self.index_mapper.index_exists(&self.env.read_txn()?, name)
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index_exists(&rtxn, name)
|
||||
}
|
||||
|
||||
/// Return the name of all indexes without opening them.
|
||||
@@ -507,7 +521,8 @@ impl IndexScheduler {
|
||||
/// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example.
|
||||
/// 3. The number of times the properties appeared.
|
||||
pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
||||
self.queue.get_stats(&self.read_txn()?, &self.processing_tasks.read().unwrap())
|
||||
let rtxn = self.read_txn()?;
|
||||
self.queue.get_stats(&rtxn, &self.processing_tasks.read().unwrap())
|
||||
}
|
||||
|
||||
// Return true if there is at least one task that is processing.
|
||||
@@ -812,7 +827,7 @@ impl IndexScheduler {
|
||||
|
||||
// add missing embedder
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(embedder_options.clone())
|
||||
Embedder::new(embedder_options.clone(), self.scheduler.embedding_cache_cap)
|
||||
.map_err(meilisearch_types::milli::vector::Error::from)
|
||||
.map_err(|err| {
|
||||
Error::from_milli(err.into(), Some(index_uid.clone()))
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
@@ -66,7 +66,7 @@ impl BatchQueue {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> {
|
||||
pub(super) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
|
||||
Ok(Self {
|
||||
all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?,
|
||||
status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?,
|
||||
|
||||
@@ -13,7 +13,7 @@ use std::time::Duration;
|
||||
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -157,7 +157,7 @@ impl Queue {
|
||||
|
||||
/// Create an index scheduler and start its run loop.
|
||||
pub(crate) fn new(
|
||||
env: &Env,
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
options: &IndexSchedulerOptions,
|
||||
) -> Result<Self> {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status, Task};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
@@ -68,7 +68,7 @@ impl TaskQueue {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub(crate) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> {
|
||||
pub(crate) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
|
||||
Ok(Self {
|
||||
all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?,
|
||||
status: env.create_database(wtxn, Some(db_name::STATUS))?,
|
||||
|
||||
@@ -21,6 +21,7 @@ use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::Status;
|
||||
use rayon::current_num_threads;
|
||||
@@ -71,10 +72,15 @@ pub struct Scheduler {
|
||||
pub(crate) snapshots_path: PathBuf,
|
||||
|
||||
/// The path to the folder containing the auth LMDB env.
|
||||
pub(crate) auth_path: PathBuf,
|
||||
pub(crate) auth_env: Env<WithoutTls>,
|
||||
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
|
||||
/// The maximal number of entries in the search query cache of an embedder.
|
||||
///
|
||||
/// 0 disables the cache.
|
||||
pub(crate) embedding_cache_cap: usize,
|
||||
}
|
||||
|
||||
impl Scheduler {
|
||||
@@ -87,12 +93,13 @@ impl Scheduler {
|
||||
batched_tasks_size_limit: self.batched_tasks_size_limit,
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
auth_env: self.auth_env.clone(),
|
||||
version_file_path: self.version_file_path.clone(),
|
||||
embedding_cache_cap: self.embedding_cache_cap,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(options: &IndexSchedulerOptions) -> Scheduler {
|
||||
pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
|
||||
Scheduler {
|
||||
must_stop_processing: MustStopProcessing::default(),
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
@@ -102,8 +109,9 @@ impl Scheduler {
|
||||
batched_tasks_size_limit: options.batched_tasks_size_limit,
|
||||
dumps_path: options.dumps_path.clone(),
|
||||
snapshots_path: options.snapshots_path.clone(),
|
||||
auth_path: options.auth_path.clone(),
|
||||
auth_env,
|
||||
version_file_path: options.version_file_path.clone(),
|
||||
embedding_cache_cap: options.embedding_cache_cap,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ use std::sync::atomic::Ordering;
|
||||
|
||||
use meilisearch_types::heed::CompactionOption;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
use meilisearch_types::{compression, VERSION_FILE_NAME};
|
||||
|
||||
@@ -28,7 +27,7 @@ impl IndexScheduler {
|
||||
|
||||
// 2. Snapshot the index-scheduler LMDB env
|
||||
//
|
||||
// When we call copy_to_file, LMDB opens a read transaction by itself,
|
||||
// When we call copy_to_path, LMDB opens a read transaction by itself,
|
||||
// we can't provide our own. It is an issue as we would like to know
|
||||
// the update files to copy but new ones can be enqueued between the copy
|
||||
// of the env and the new transaction we open to retrieve the enqueued tasks.
|
||||
@@ -42,7 +41,7 @@ impl IndexScheduler {
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
|
||||
let dst = temp_snapshot_dir.path().join("tasks");
|
||||
fs::create_dir_all(&dst)?;
|
||||
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
|
||||
// 2.2 Create a read transaction on the index-scheduler
|
||||
let rtxn = self.env.read_txn()?;
|
||||
@@ -81,7 +80,7 @@ impl IndexScheduler {
|
||||
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
||||
fs::create_dir_all(&dst)?;
|
||||
index
|
||||
.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)
|
||||
.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)
|
||||
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
|
||||
}
|
||||
|
||||
@@ -91,14 +90,7 @@ impl IndexScheduler {
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
|
||||
let dst = temp_snapshot_dir.path().join("auth");
|
||||
fs::create_dir_all(&dst)?;
|
||||
// TODO We can't use the open_auth_store_env function here but we should
|
||||
let auth = unsafe {
|
||||
milli::heed::EnvOpenOptions::new()
|
||||
.map_size(1024 * 1024 * 1024) // 1 GiB
|
||||
.max_dbs(2)
|
||||
.open(&self.scheduler.auth_path)
|
||||
}?;
|
||||
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
|
||||
// 5. Copy and tarball the flat snapshot
|
||||
progress.update_progress(SnapshotCreationProgress::CreateTheTarball);
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
@@ -37,7 +37,7 @@ catto [1,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -40,7 +40,7 @@ doggo [2,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -104,10 +104,9 @@ fn import_vectors() {
|
||||
|
||||
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
|
||||
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
||||
let beagle_embed =
|
||||
hf_embedder.embed_search(S("Intel the beagle best doggo"), None).unwrap();
|
||||
let lab_embed = hf_embedder.embed_search(S("Max the lab best doggo"), None).unwrap();
|
||||
let patou_embed = hf_embedder.embed_search(S("kefir the patou best doggo"), None).unwrap();
|
||||
let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap();
|
||||
let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap();
|
||||
let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap();
|
||||
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
|
||||
};
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ use std::time::Duration;
|
||||
use big_s::S;
|
||||
use crossbeam_channel::RecvTimeoutError;
|
||||
use file_store::File;
|
||||
use meilisearch_auth::open_auth_store_env;
|
||||
use meilisearch_types::document_formats::DocumentFormatError;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
@@ -111,6 +112,7 @@ impl IndexScheduler {
|
||||
batched_tasks_size_limit: u64::MAX,
|
||||
instance_features: Default::default(),
|
||||
auto_upgrade: true, // Don't cost much and will ensure the happy path works
|
||||
embedding_cache_cap: 10,
|
||||
};
|
||||
let version = configuration(&mut options).unwrap_or_else(|| {
|
||||
(
|
||||
@@ -120,7 +122,10 @@ impl IndexScheduler {
|
||||
)
|
||||
});
|
||||
|
||||
let index_scheduler = Self::new(options, version, sender, planned_failures).unwrap();
|
||||
std::fs::create_dir_all(&options.auth_path).unwrap();
|
||||
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
|
||||
let index_scheduler =
|
||||
Self::new(options, auth_env, version, sender, planned_failures).unwrap();
|
||||
|
||||
// To be 100% consistent between all test we're going to start the scheduler right now
|
||||
// and ensure it's in the expected starting state.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use anyhow::bail;
|
||||
use meilisearch_types::heed::{Env, RwTxn};
|
||||
use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
|
||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use time::OffsetDateTime;
|
||||
@@ -9,13 +9,17 @@ use crate::queue::TaskQueue;
|
||||
use crate::versioning::Versioning;
|
||||
|
||||
trait UpgradeIndexScheduler {
|
||||
fn upgrade(&self, env: &Env, wtxn: &mut RwTxn, original: (u32, u32, u32))
|
||||
-> anyhow::Result<()>;
|
||||
fn upgrade(
|
||||
&self,
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
original: (u32, u32, u32),
|
||||
) -> anyhow::Result<()>;
|
||||
fn target_version(&self) -> (u32, u32, u32);
|
||||
}
|
||||
|
||||
pub fn upgrade_index_scheduler(
|
||||
env: &Env,
|
||||
env: &Env<WithoutTls>,
|
||||
versioning: &Versioning,
|
||||
from: (u32, u32, u32),
|
||||
to: (u32, u32, u32),
|
||||
@@ -29,6 +33,7 @@ pub fn upgrade_index_scheduler(
|
||||
let start = match from {
|
||||
(1, 12, _) => 0,
|
||||
(1, 13, _) => 0,
|
||||
(1, 14, _) => 0,
|
||||
(major, minor, patch) => {
|
||||
if major > current_major
|
||||
|| (major == current_major && minor > current_minor)
|
||||
@@ -91,7 +96,7 @@ struct ToCurrentNoOp {}
|
||||
impl UpgradeIndexScheduler for ToCurrentNoOp {
|
||||
fn upgrade(
|
||||
&self,
|
||||
_env: &Env,
|
||||
_env: &Env<WithoutTls>,
|
||||
_wtxn: &mut RwTxn,
|
||||
_original: (u32, u32, u32),
|
||||
) -> anyhow::Result<()> {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use meilisearch_types::heed::types::Str;
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::heed_codec::version::VersionCodec;
|
||||
use meilisearch_types::versioning;
|
||||
|
||||
@@ -46,12 +46,12 @@ impl Versioning {
|
||||
}
|
||||
|
||||
/// Return `Self` without checking anything about the version
|
||||
pub fn raw_new(env: &Env, wtxn: &mut RwTxn) -> Result<Self, heed::Error> {
|
||||
pub fn raw_new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self, heed::Error> {
|
||||
let version = env.create_database(wtxn, Some(db_name::VERSION))?;
|
||||
Ok(Self { version })
|
||||
}
|
||||
|
||||
pub(crate) fn new(env: &Env, db_version: (u32, u32, u32)) -> Result<Self> {
|
||||
pub(crate) fn new(env: &Env<WithoutTls>, db_version: (u32, u32, u32)) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let this = Self::raw_new(env, &mut wtxn)?;
|
||||
let from = match this.get_version(&wtxn)? {
|
||||
|
||||
@@ -2,6 +2,7 @@ use std::fs::File;
|
||||
use std::io::{BufReader, Write};
|
||||
use std::path::Path;
|
||||
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use serde_json::Deserializer;
|
||||
|
||||
use crate::{AuthController, HeedAuthStore, Result};
|
||||
@@ -9,11 +10,8 @@ use crate::{AuthController, HeedAuthStore, Result};
|
||||
const KEYS_PATH: &str = "keys";
|
||||
|
||||
impl AuthController {
|
||||
pub fn dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let mut store = HeedAuthStore::new(&src)?;
|
||||
|
||||
// do not attempt to close the database on drop!
|
||||
store.set_drop_on_close(false);
|
||||
pub fn dump(auth_env: Env<WithoutTls>, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let store = HeedAuthStore::new(auth_env)?;
|
||||
|
||||
let keys_file_path = dst.as_ref().join(KEYS_PATH);
|
||||
|
||||
@@ -27,8 +25,8 @@ impl AuthController {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let store = HeedAuthStore::new(&dst)?;
|
||||
pub fn load_dump(src: impl AsRef<Path>, auth_env: Env<WithoutTls>) -> Result<()> {
|
||||
let store = HeedAuthStore::new(auth_env)?;
|
||||
|
||||
let keys_file_path = src.as_ref().join(KEYS_PATH);
|
||||
|
||||
|
||||
@@ -3,11 +3,10 @@ pub mod error;
|
||||
mod store;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use error::{AuthControllerError, Result};
|
||||
use maplit::hashset;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey};
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
@@ -19,19 +18,19 @@ use uuid::Uuid;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AuthController {
|
||||
store: Arc<HeedAuthStore>,
|
||||
store: HeedAuthStore,
|
||||
master_key: Option<String>,
|
||||
}
|
||||
|
||||
impl AuthController {
|
||||
pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> {
|
||||
let store = HeedAuthStore::new(db_path)?;
|
||||
pub fn new(auth_env: Env<WithoutTls>, master_key: &Option<String>) -> Result<Self> {
|
||||
let store = HeedAuthStore::new(auth_env)?;
|
||||
|
||||
if store.is_empty()? {
|
||||
generate_default_keys(&store)?;
|
||||
}
|
||||
|
||||
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
|
||||
Ok(Self { store, master_key: master_key.clone() })
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth controller is able to access one of its database.
|
||||
|
||||
@@ -1,18 +1,16 @@
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::HashSet;
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use std::result::Result as StdResult;
|
||||
use std::str;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use hmac::{Hmac, Mac};
|
||||
use meilisearch_types::heed::BoxedError;
|
||||
use meilisearch_types::heed::{BoxedError, WithoutTls};
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::KeyId;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::heed;
|
||||
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
|
||||
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
|
||||
use sha2::Sha256;
|
||||
@@ -25,44 +23,32 @@ use super::error::{AuthControllerError, Result};
|
||||
use super::{Action, Key};
|
||||
|
||||
const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB
|
||||
const AUTH_DB_PATH: &str = "auth";
|
||||
const KEY_DB_NAME: &str = "api-keys";
|
||||
const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct HeedAuthStore {
|
||||
env: Arc<Env>,
|
||||
env: Env<WithoutTls>,
|
||||
keys: Database<Bytes, SerdeJson<Key>>,
|
||||
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
|
||||
should_close_on_drop: bool,
|
||||
}
|
||||
|
||||
impl Drop for HeedAuthStore {
|
||||
fn drop(&mut self) {
|
||||
if self.should_close_on_drop && Arc::strong_count(&self.env) == 1 {
|
||||
self.env.as_ref().clone().prepare_for_closing();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env> {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
pub fn open_auth_store_env(path: &Path) -> heed::Result<Env<WithoutTls>> {
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(AUTH_STORE_SIZE); // 1GB
|
||||
options.max_dbs(2);
|
||||
unsafe { options.open(path) }
|
||||
}
|
||||
|
||||
impl HeedAuthStore {
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let path = path.as_ref().join(AUTH_DB_PATH);
|
||||
create_dir_all(&path)?;
|
||||
let env = Arc::new(open_auth_store_env(path.as_ref())?);
|
||||
pub fn new(env: Env<WithoutTls>) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?;
|
||||
let action_keyid_index_expiration =
|
||||
env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
|
||||
wtxn.commit()?;
|
||||
Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true })
|
||||
Ok(Self { env, keys, action_keyid_index_expiration })
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth store is able to access one of its database.
|
||||
@@ -82,10 +68,6 @@ impl HeedAuthStore {
|
||||
Ok(self.env.non_free_pages_size()?)
|
||||
}
|
||||
|
||||
pub fn set_drop_on_close(&mut self, v: bool) {
|
||||
self.should_close_on_drop = v;
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> Result<bool> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
@@ -293,7 +275,7 @@ impl HeedAuthStore {
|
||||
/// optionally on a specific index, for a given key.
|
||||
pub struct KeyIdActionCodec;
|
||||
|
||||
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
|
||||
impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
|
||||
type DItem = (KeyId, Action, Option<&'a [u8]>);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
|
||||
@@ -310,7 +292,7 @@ impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
|
||||
impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec {
|
||||
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
|
||||
|
||||
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
|
||||
|
||||
@@ -241,6 +241,7 @@ InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentIds , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchEmbedder , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -281,6 +282,7 @@ InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchLocales , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchExhaustiveFacetCount, InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarId , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -405,7 +407,7 @@ impl ErrorCode for milli::Error {
|
||||
match error {
|
||||
// TODO: wait for spec for new error codes.
|
||||
UserError::SerdeJson(_)
|
||||
| UserError::InvalidLmdbOpenOptions
|
||||
| UserError::EnvAlreadyOpened
|
||||
| UserError::DocumentLimitReached
|
||||
| UserError::UnknownInternalDocumentId { .. } => Code::Internal,
|
||||
UserError::InvalidStoreFile => Code::InvalidStoreFile,
|
||||
@@ -502,8 +504,7 @@ impl ErrorCode for HeedError {
|
||||
HeedError::Mdb(_)
|
||||
| HeedError::Encoding(_)
|
||||
| HeedError::Decoding(_)
|
||||
| HeedError::DatabaseClosing
|
||||
| HeedError::BadOpenOptions { .. } => Code::Internal,
|
||||
| HeedError::EnvAlreadyOpened => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub contains_filter: bool,
|
||||
pub network: bool,
|
||||
pub get_task_documents_route: bool,
|
||||
pub composite_embedders: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
|
||||
@@ -198,6 +198,8 @@ struct Infos {
|
||||
experimental_limit_batched_tasks_total_size: u64,
|
||||
experimental_network: bool,
|
||||
experimental_get_task_documents_route: bool,
|
||||
experimental_composite_embedders: bool,
|
||||
experimental_embedding_cache_entries: usize,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
@@ -245,6 +247,7 @@ impl Infos {
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
experimental_limit_batched_tasks_total_size,
|
||||
experimental_embedding_cache_entries,
|
||||
http_addr,
|
||||
master_key: _,
|
||||
env,
|
||||
@@ -290,6 +293,7 @@ impl Infos {
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
} = features;
|
||||
|
||||
// We're going to override every sensible information.
|
||||
@@ -309,6 +313,8 @@ impl Infos {
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_network: network,
|
||||
experimental_get_task_documents_route: get_task_documents_route,
|
||||
experimental_composite_embedders: composite_embedders,
|
||||
experimental_embedding_cache_entries,
|
||||
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
||||
db_path: db_path != PathBuf::from("./data.ms"),
|
||||
import_dump: import_dump.is_some(),
|
||||
|
||||
@@ -34,7 +34,7 @@ use error::PayloadError;
|
||||
use extractors::payload::PayloadConfig;
|
||||
use index_scheduler::versioning::Versioning;
|
||||
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_auth::{open_auth_store_env, AuthController};
|
||||
use meilisearch_types::milli::constants::VERSION_MAJOR;
|
||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
|
||||
@@ -233,6 +233,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features: opt.to_instance_features(),
|
||||
auto_upgrade: opt.experimental_dumpless_upgrade,
|
||||
embedding_cache_cap: opt.experimental_embedding_cache_entries,
|
||||
};
|
||||
let bin_major: u32 = VERSION_MAJOR.parse().unwrap();
|
||||
let bin_minor: u32 = VERSION_MINOR.parse().unwrap();
|
||||
@@ -335,9 +336,12 @@ fn open_or_create_database_unchecked(
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
// we don't want to create anything in the data.ms yet, thus we
|
||||
// wrap our two builders in a closure that'll be executed later.
|
||||
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
|
||||
let index_scheduler_builder =
|
||||
|| -> anyhow::Result<_> { Ok(IndexScheduler::new(index_scheduler_opt, version)?) };
|
||||
std::fs::create_dir_all(&index_scheduler_opt.auth_path)?;
|
||||
let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap();
|
||||
let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key);
|
||||
let index_scheduler_builder = || -> anyhow::Result<_> {
|
||||
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version)?)
|
||||
};
|
||||
|
||||
match (
|
||||
index_scheduler_builder(),
|
||||
@@ -420,6 +424,7 @@ pub fn update_version_file_for_dumpless_upgrade(
|
||||
if from_major == 1 && from_minor == 12 {
|
||||
let env = unsafe {
|
||||
heed::EnvOpenOptions::new()
|
||||
.read_txn_without_tls()
|
||||
.max_dbs(Versioning::nb_db())
|
||||
.map_size(index_scheduler_opt.task_db_size)
|
||||
.open(&index_scheduler_opt.tasks_path)
|
||||
|
||||
@@ -63,7 +63,8 @@ const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
|
||||
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
|
||||
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
|
||||
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE";
|
||||
|
||||
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
||||
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
||||
@@ -446,6 +447,14 @@ pub struct Opt {
|
||||
#[serde(default = "default_limit_batched_tasks_total_size")]
|
||||
pub experimental_limit_batched_tasks_total_size: u64,
|
||||
|
||||
/// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
|
||||
/// distinct embedder.
|
||||
///
|
||||
/// For more information, see <https://github.com/orgs/meilisearch/discussions/818>.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES, default_value_t = default_embedding_cache_entries())]
|
||||
#[serde(default = "default_embedding_cache_entries")]
|
||||
pub experimental_embedding_cache_entries: usize,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
@@ -549,6 +558,7 @@ impl Opt {
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
experimental_limit_batched_tasks_total_size,
|
||||
experimental_embedding_cache_entries,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
@@ -641,6 +651,10 @@ impl Opt {
|
||||
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
|
||||
experimental_limit_batched_tasks_total_size.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES,
|
||||
experimental_embedding_cache_entries.to_string(),
|
||||
);
|
||||
indexer_options.export_to_env();
|
||||
}
|
||||
|
||||
@@ -948,6 +962,10 @@ fn default_limit_batched_tasks_total_size() -> u64 {
|
||||
u64::MAX
|
||||
}
|
||||
|
||||
fn default_embedding_cache_entries() -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
fn default_snapshot_dir() -> PathBuf {
|
||||
PathBuf::from(DEFAULT_SNAPSHOT_DIR)
|
||||
}
|
||||
|
||||
@@ -52,6 +52,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
contains_filter: Some(false),
|
||||
network: Some(false),
|
||||
get_task_documents_route: Some(false),
|
||||
composite_embedders: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@@ -94,6 +95,8 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub network: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub get_task_documents_route: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub composite_embedders: Option<bool>,
|
||||
}
|
||||
|
||||
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
|
||||
@@ -105,6 +108,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
} = value;
|
||||
|
||||
Self {
|
||||
@@ -114,6 +118,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
contains_filter: Some(contains_filter),
|
||||
network: Some(network),
|
||||
get_task_documents_route: Some(get_task_documents_route),
|
||||
composite_embedders: Some(composite_embedders),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -126,6 +131,7 @@ pub struct PatchExperimentalFeatureAnalytics {
|
||||
contains_filter: bool,
|
||||
network: bool,
|
||||
get_task_documents_route: bool,
|
||||
composite_embedders: bool,
|
||||
}
|
||||
|
||||
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
@@ -141,6 +147,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
contains_filter: new.contains_filter,
|
||||
network: new.network,
|
||||
get_task_documents_route: new.get_task_documents_route,
|
||||
composite_embedders: new.composite_embedders,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -165,6 +172,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
contains_filter: Some(false),
|
||||
network: Some(false),
|
||||
get_task_documents_route: Some(false),
|
||||
composite_embedders: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@@ -202,6 +210,10 @@ async fn patch_features(
|
||||
.0
|
||||
.get_task_documents_route
|
||||
.unwrap_or(old_features.get_task_documents_route),
|
||||
composite_embedders: new_features
|
||||
.0
|
||||
.composite_embedders
|
||||
.unwrap_or(old_features.composite_embedders),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
@@ -214,6 +226,7 @@ async fn patch_features(
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
@@ -224,6 +237,7 @@ async fn patch_features(
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
@@ -20,11 +20,13 @@ use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||
use meilisearch_types::milli::DocumentId;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use meilisearch_types::star_or::OptionStarOrList;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::{milli, Document, Index};
|
||||
use mime::Mime;
|
||||
use once_cell::sync::Lazy;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use tempfile::tempfile;
|
||||
@@ -43,7 +45,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::{
|
||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||
};
|
||||
use crate::search::{parse_filter, RetrieveVectors};
|
||||
use crate::search::{parse_filter, ExternalDocumentId, RetrieveVectors};
|
||||
use crate::{aggregate_methods, Opt};
|
||||
|
||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||
@@ -137,6 +139,9 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
|
||||
#[serde(rename = "vector.retrieve_vectors")]
|
||||
retrieve_vectors: bool,
|
||||
|
||||
// maximum size of `ids` array. 0 if always empty or `null`
|
||||
max_document_ids: usize,
|
||||
|
||||
// pagination
|
||||
#[serde(rename = "pagination.max_limit")]
|
||||
max_limit: usize,
|
||||
@@ -149,7 +154,7 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum DocumentFetchKind {
|
||||
PerDocumentId { retrieve_vectors: bool },
|
||||
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
|
||||
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize },
|
||||
}
|
||||
|
||||
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
|
||||
@@ -161,12 +166,18 @@ impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
|
||||
}
|
||||
};
|
||||
|
||||
let ids = match query {
|
||||
DocumentFetchKind::Normal { ids, .. } => *ids,
|
||||
DocumentFetchKind::PerDocumentId { .. } => 0,
|
||||
};
|
||||
|
||||
Self {
|
||||
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
|
||||
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
|
||||
max_limit: limit,
|
||||
max_offset: offset,
|
||||
retrieve_vectors,
|
||||
max_document_ids: ids,
|
||||
|
||||
marker: PhantomData,
|
||||
}
|
||||
@@ -185,6 +196,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
|
||||
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
|
||||
max_limit: self.max_limit.max(new.max_limit),
|
||||
max_offset: self.max_offset.max(new.max_offset),
|
||||
max_document_ids: self.max_document_ids.max(new.max_document_ids),
|
||||
marker: PhantomData,
|
||||
})
|
||||
}
|
||||
@@ -266,6 +278,7 @@ pub async fn get_document(
|
||||
per_filter: false,
|
||||
max_limit: 0,
|
||||
max_offset: 0,
|
||||
max_document_ids: 0,
|
||||
marker: PhantomData,
|
||||
},
|
||||
&req,
|
||||
@@ -387,6 +400,9 @@ pub struct BrowseQueryGet {
|
||||
#[param(default, value_type = Option<bool>)]
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
|
||||
retrieve_vectors: Param<bool>,
|
||||
#[param(default, value_type = Option<Vec<String>>)]
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentIds>)]
|
||||
ids: Option<CS<String>>,
|
||||
#[param(default, value_type = Option<String>, example = "popularity > 1000")]
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
|
||||
filter: Option<String>,
|
||||
@@ -408,6 +424,9 @@ pub struct BrowseQuery {
|
||||
#[schema(default, example = true)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)]
|
||||
retrieve_vectors: bool,
|
||||
#[schema(value_type = Option<Vec<String>>, example = json!(["cody", "finn", "brandy", "gambit"]))]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentIds>)]
|
||||
ids: Option<Vec<serde_json::Value>>,
|
||||
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
||||
filter: Option<Value>,
|
||||
@@ -479,6 +498,7 @@ pub async fn documents_by_query_post(
|
||||
retrieve_vectors: body.retrieve_vectors,
|
||||
max_limit: body.limit,
|
||||
max_offset: body.offset,
|
||||
max_document_ids: body.ids.as_ref().map(Vec::len).unwrap_or_default(),
|
||||
per_document_id: false,
|
||||
marker: PhantomData,
|
||||
},
|
||||
@@ -551,7 +571,8 @@ pub async fn get_documents(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?params, "Get documents GET");
|
||||
|
||||
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner();
|
||||
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } =
|
||||
params.into_inner();
|
||||
|
||||
let filter = match filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
@@ -561,12 +582,15 @@ pub async fn get_documents(
|
||||
None => None,
|
||||
};
|
||||
|
||||
let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect());
|
||||
|
||||
let query = BrowseQuery {
|
||||
offset: offset.0,
|
||||
limit: limit.0,
|
||||
fields: fields.merge_star_and_none(),
|
||||
retrieve_vectors: retrieve_vectors.0,
|
||||
filter,
|
||||
ids,
|
||||
};
|
||||
|
||||
analytics.publish(
|
||||
@@ -575,6 +599,7 @@ pub async fn get_documents(
|
||||
retrieve_vectors: query.retrieve_vectors,
|
||||
max_limit: query.limit,
|
||||
max_offset: query.offset,
|
||||
max_document_ids: query.ids.as_ref().map(Vec::len).unwrap_or_default(),
|
||||
per_document_id: false,
|
||||
marker: PhantomData,
|
||||
},
|
||||
@@ -590,15 +615,30 @@ fn documents_by_query(
|
||||
query: BrowseQuery,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query;
|
||||
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query;
|
||||
|
||||
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
|
||||
|
||||
let ids = if let Some(ids) = ids {
|
||||
let mut parsed_ids = Vec::with_capacity(ids.len());
|
||||
for (index, id) in ids.into_iter().enumerate() {
|
||||
let id = id.try_into().map_err(|error| {
|
||||
let msg = format!("In `.ids[{index}]`: {error}");
|
||||
ResponseError::from_msg(msg, Code::InvalidDocumentIds)
|
||||
})?;
|
||||
parsed_ids.push(id)
|
||||
}
|
||||
Some(parsed_ids)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let (total, documents) = retrieve_documents(
|
||||
&index,
|
||||
offset,
|
||||
limit,
|
||||
ids,
|
||||
filter,
|
||||
fields,
|
||||
retrieve_vectors,
|
||||
@@ -1451,10 +1491,12 @@ fn some_documents<'a, 't: 'a>(
|
||||
}))
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn retrieve_documents<S: AsRef<str>>(
|
||||
index: &Index,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
ids: Option<Vec<ExternalDocumentId>>,
|
||||
filter: Option<Value>,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
@@ -1468,16 +1510,28 @@ fn retrieve_documents<S: AsRef<str>>(
|
||||
None
|
||||
};
|
||||
|
||||
let candidates = if let Some(filter) = filter {
|
||||
filter.evaluate(&rtxn, index).map_err(|err| match err {
|
||||
let mut candidates = if let Some(ids) = ids {
|
||||
let external_document_ids = index.external_documents_ids();
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
for id in ids.iter() {
|
||||
let Some(docid) = external_document_ids.get(&rtxn, id)? else {
|
||||
continue;
|
||||
};
|
||||
candidates.insert(docid);
|
||||
}
|
||||
candidates
|
||||
} else {
|
||||
index.documents_ids(&rtxn)?
|
||||
};
|
||||
|
||||
if let Some(filter) = filter {
|
||||
candidates &= filter.evaluate(&rtxn, index).map_err(|err| match err {
|
||||
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||
ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter)
|
||||
}
|
||||
e => e.into(),
|
||||
})?
|
||||
} else {
|
||||
index.documents_ids(&rtxn)?
|
||||
};
|
||||
}
|
||||
|
||||
let (it, number_of_documents) = {
|
||||
let number_of_documents = candidates.len();
|
||||
|
||||
@@ -68,6 +68,8 @@ pub struct FacetSearchQuery {
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
|
||||
pub locales: Option<Vec<Locale>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchExhaustiveFacetCount>, default)]
|
||||
pub exhaustive_facet_count: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
@@ -98,6 +100,7 @@ impl FacetSearchAggregator {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
exhaustive_facet_count,
|
||||
} = query;
|
||||
|
||||
Self {
|
||||
@@ -110,7 +113,8 @@ impl FacetSearchAggregator {
|
||||
|| attributes_to_search_on.is_some()
|
||||
|| hybrid.is_some()
|
||||
|| ranking_score_threshold.is_some()
|
||||
|| locales.is_some(),
|
||||
|| locales.is_some()
|
||||
|| exhaustive_facet_count.is_some(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
@@ -293,13 +297,24 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
exhaustive_facet_count,
|
||||
} = value;
|
||||
|
||||
// If exhaustive_facet_count is true, we need to set the page to 0
|
||||
// because the facet search is not exhaustive by default.
|
||||
let page = if exhaustive_facet_count.map_or(false, |exhaustive| exhaustive) {
|
||||
// setting the page to 0 will force the search to be exhaustive when computing the number of hits,
|
||||
// but it will skip the bucket sort saving time.
|
||||
Some(0)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
SearchQuery {
|
||||
q,
|
||||
offset: DEFAULT_SEARCH_OFFSET(),
|
||||
limit: DEFAULT_SEARCH_LIMIT(),
|
||||
page: None,
|
||||
page,
|
||||
hits_per_page: None,
|
||||
attributes_to_retrieve: None,
|
||||
retrieve_vectors: false,
|
||||
|
||||
@@ -716,7 +716,30 @@ pub async fn delete_all(
|
||||
|
||||
fn validate_settings(
|
||||
settings: Settings<Unchecked>,
|
||||
_index_scheduler: &IndexScheduler,
|
||||
index_scheduler: &IndexScheduler,
|
||||
) -> Result<Settings<Unchecked>, ResponseError> {
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::vector::settings::EmbedderSource;
|
||||
|
||||
let features = index_scheduler.features();
|
||||
if let Setting::Set(embedders) = &settings.embedders {
|
||||
for SettingEmbeddingSettings { inner: embedder } in embedders.values() {
|
||||
let Setting::Set(embedder) = embedder else {
|
||||
continue;
|
||||
};
|
||||
if matches!(embedder.source, Setting::Set(EmbedderSource::Composite)) {
|
||||
features.check_composite_embedders("using `\"composite\"` as source")?;
|
||||
}
|
||||
|
||||
if matches!(embedder.search_embedder, Setting::Set(_)) {
|
||||
features.check_composite_embedders("setting `searchEmbedder`")?;
|
||||
}
|
||||
|
||||
if matches!(embedder.indexing_embedder, Setting::Set(_)) {
|
||||
features.check_composite_embedders("setting `indexingEmbedder`")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(settings.validate()?)
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{ErrorCode as _, ResponseError};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
@@ -111,7 +111,7 @@ pub async fn similar_get(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.0.try_into()?;
|
||||
let query = params.0.into();
|
||||
|
||||
let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query);
|
||||
|
||||
@@ -295,10 +295,8 @@ impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
type Error = ResponseError;
|
||||
|
||||
fn try_from(
|
||||
impl From<SimilarQueryGet> for SimilarQuery {
|
||||
fn from(
|
||||
SimilarQueryGet {
|
||||
id,
|
||||
offset,
|
||||
@@ -311,7 +309,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
embedder,
|
||||
ranking_score_threshold,
|
||||
}: SimilarQueryGet,
|
||||
) -> Result<Self, Self::Error> {
|
||||
) -> Self {
|
||||
let filter = match filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
Ok(v) => Some(v),
|
||||
@@ -320,10 +318,8 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
None => None,
|
||||
};
|
||||
|
||||
Ok(SimilarQuery {
|
||||
id: id.0.try_into().map_err(|code: InvalidSimilarId| {
|
||||
ResponseError::from_msg(code.to_string(), code.error_code())
|
||||
})?,
|
||||
SimilarQuery {
|
||||
id: serde_json::Value::String(id.0),
|
||||
offset: offset.0,
|
||||
limit: limit.0,
|
||||
filter,
|
||||
@@ -333,6 +329,6 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
show_ranking_score: show_ranking_score.0,
|
||||
show_ranking_score_details: show_ranking_score_details.0,
|
||||
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -340,7 +340,8 @@ impl SearchKind {
|
||||
vector_len: Option<usize>,
|
||||
route: Route,
|
||||
) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
|
||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let embedder_configs = index.embedding_configs(&rtxn)?;
|
||||
let embedders = index_scheduler.embedders(index_uid, embedder_configs)?;
|
||||
|
||||
let (embedder, _, quantized) = embedders
|
||||
@@ -635,7 +636,7 @@ impl SearchQueryWithIndex {
|
||||
pub struct SimilarQuery {
|
||||
#[deserr(error = DeserrJsonError<InvalidSimilarId>)]
|
||||
#[schema(value_type = String)]
|
||||
pub id: ExternalDocumentId,
|
||||
pub id: serde_json::Value,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSimilarOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSimilarLimit>)]
|
||||
@@ -657,8 +658,7 @@ pub struct SimilarQuery {
|
||||
pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Deserr)]
|
||||
#[deserr(try_from(Value) = TryFrom::try_from -> InvalidSimilarId)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ExternalDocumentId(String);
|
||||
|
||||
impl AsRef<str> for ExternalDocumentId {
|
||||
@@ -674,7 +674,7 @@ impl ExternalDocumentId {
|
||||
}
|
||||
|
||||
impl TryFrom<String> for ExternalDocumentId {
|
||||
type Error = InvalidSimilarId;
|
||||
type Error = milli::UserError;
|
||||
|
||||
fn try_from(value: String) -> Result<Self, Self::Error> {
|
||||
serde_json::Value::String(value).try_into()
|
||||
@@ -682,10 +682,10 @@ impl TryFrom<String> for ExternalDocumentId {
|
||||
}
|
||||
|
||||
impl TryFrom<Value> for ExternalDocumentId {
|
||||
type Error = InvalidSimilarId;
|
||||
type Error = milli::UserError;
|
||||
|
||||
fn try_from(value: Value) -> Result<Self, Self::Error> {
|
||||
Ok(Self(milli::documents::validate_document_id_value(value).map_err(|_| InvalidSimilarId)?))
|
||||
Ok(Self(milli::documents::validate_document_id_value(value)?))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -916,7 +916,7 @@ fn prepare_search<'t>(
|
||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
|
||||
|
||||
embedder
|
||||
.embed_search(query.q.clone().unwrap(), Some(deadline))
|
||||
.embed_search(query.q.as_ref().unwrap(), Some(deadline))
|
||||
.map_err(milli::vector::Error::from)
|
||||
.map_err(milli::Error::from)?
|
||||
}
|
||||
@@ -1598,6 +1598,11 @@ pub fn perform_similar(
|
||||
ranking_score_threshold,
|
||||
} = query;
|
||||
|
||||
let id: ExternalDocumentId = id.try_into().map_err(|error| {
|
||||
let msg = format!("Invalid value at `.id`: {error}");
|
||||
ResponseError::from_msg(msg, Code::InvalidSimilarId)
|
||||
})?;
|
||||
|
||||
// using let-else rather than `?` so that the borrow checker identifies we're always returning here,
|
||||
// preventing a use-after-move
|
||||
let Some(internal_id) = index.external_documents_ids().get(&rtxn, &id)? else {
|
||||
|
||||
@@ -411,7 +411,7 @@ impl<State> Index<'_, State> {
|
||||
self.service.get(url).await
|
||||
}
|
||||
|
||||
pub async fn get_document_by_filter(&self, payload: Value) -> (Value, StatusCode) {
|
||||
pub async fn fetch_documents(&self, payload: Value) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref()));
|
||||
self.service.post(url, payload).await
|
||||
}
|
||||
|
||||
@@ -667,7 +667,7 @@ async fn fetch_document_by_filter() {
|
||||
.await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!(null)).await;
|
||||
let (response, code) = index.fetch_documents(json!(null)).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
@@ -678,7 +678,7 @@ async fn fetch_document_by_filter() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
|
||||
let (response, code) = index.fetch_documents(json!({ "offset": "doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
@@ -689,7 +689,7 @@ async fn fetch_document_by_filter() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
|
||||
let (response, code) = index.fetch_documents(json!({ "limit": "doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
@@ -700,7 +700,7 @@ async fn fetch_document_by_filter() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
|
||||
let (response, code) = index.fetch_documents(json!({ "fields": "doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
@@ -711,7 +711,7 @@ async fn fetch_document_by_filter() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
|
||||
let (response, code) = index.fetch_documents(json!({ "filter": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
@@ -722,7 +722,7 @@ async fn fetch_document_by_filter() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await;
|
||||
let (response, code) = index.fetch_documents(json!({ "filter": "cool doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
@@ -733,8 +733,7 @@ async fn fetch_document_by_filter() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) =
|
||||
index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
|
||||
let (response, code) = index.fetch_documents(json!({ "filter": "doggo = bernese" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
@@ -762,8 +761,7 @@ async fn retrieve_vectors() {
|
||||
"###);
|
||||
|
||||
// FETCH ALL DOCUMENTS BY POST
|
||||
let (response, _code) =
|
||||
index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await;
|
||||
let (response, _code) = index.fetch_documents(json!({ "retrieveVectors": "tamo" })).await;
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`",
|
||||
|
||||
@@ -371,7 +371,7 @@ async fn get_document_by_filter() {
|
||||
.await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({})).await;
|
||||
let (response, code) = index.fetch_documents(json!({})).await;
|
||||
let (response2, code2) = index.get_all_documents_raw("").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
@@ -401,7 +401,7 @@ async fn get_document_by_filter() {
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "filter": "color = blue" })).await;
|
||||
let (response, code) = index.fetch_documents(json!({ "filter": "color = blue" })).await;
|
||||
let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
@@ -424,9 +424,8 @@ async fn get_document_by_filter() {
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
let (response, code) = index
|
||||
.get_document_by_filter(json!({ "offset": 1, "limit": 1, "filter": "color != blue" }))
|
||||
.await;
|
||||
let (response, code) =
|
||||
index.fetch_documents(json!({ "offset": 1, "limit": 1, "filter": "color != blue" })).await;
|
||||
let (response2, code2) =
|
||||
index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@@ -446,9 +445,7 @@ async fn get_document_by_filter() {
|
||||
assert_eq!(response, response2);
|
||||
|
||||
let (response, code) = index
|
||||
.get_document_by_filter(
|
||||
json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }),
|
||||
)
|
||||
.fetch_documents(json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }))
|
||||
.await;
|
||||
let (response2, code2) =
|
||||
index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await;
|
||||
@@ -471,7 +468,7 @@ async fn get_document_by_filter() {
|
||||
// Now testing more complex filter that the get route can't represent
|
||||
|
||||
let (response, code) =
|
||||
index.get_document_by_filter(json!({ "filter": [["color = blue", "color = red"]] })).await;
|
||||
index.fetch_documents(json!({ "filter": [["color = blue", "color = red"]] })).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
@@ -495,9 +492,8 @@ async fn get_document_by_filter() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.get_document_by_filter(json!({ "filter": [["color != blue"], "color EXISTS"] }))
|
||||
.await;
|
||||
let (response, code) =
|
||||
index.fetch_documents(json!({ "filter": [["color != blue"], "color EXISTS"] })).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
@@ -514,6 +510,326 @@ async fn get_document_by_filter() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_document_by_ids() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
let (task, _code) = index
|
||||
.add_documents(
|
||||
json!([
|
||||
{ "id": 0, "color": "red" },
|
||||
{ "id": 1, "color": "blue" },
|
||||
{ "id": 2, "color": "blue" },
|
||||
{ "id": 3 },
|
||||
]),
|
||||
Some("id"),
|
||||
)
|
||||
.await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index
|
||||
.fetch_documents(json!({
|
||||
"ids": ["0", 1, 2, 3]
|
||||
}))
|
||||
.await;
|
||||
let (response2, code2) = index.get_all_documents_raw("?ids=0,1,2,3").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"color": "red"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"color": "blue"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"color": "blue"
|
||||
},
|
||||
{
|
||||
"id": 3
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 4
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
let (response, code) = index.fetch_documents(json!({ "ids": [2, "1"] })).await;
|
||||
let (response2, code2) = index.get_all_documents_raw("?ids=2,1").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 1,
|
||||
"color": "blue"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"color": "blue"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
let (response, code) =
|
||||
index.fetch_documents(json!({ "offset": 1, "limit": 1, "ids": ["0", 0, 3] })).await;
|
||||
let (response2, code2) = index.get_all_documents_raw("?ids=3,0&offset=1&limit=1").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 3
|
||||
}
|
||||
],
|
||||
"offset": 1,
|
||||
"limit": 1,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
let (response, code) =
|
||||
index.fetch_documents(json!({ "limit": 1, "ids": [0, 3], "fields": ["color"] })).await;
|
||||
let (response2, code2) = index.get_all_documents_raw("?limit=1&ids=0,3&fields=color").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"color": "red"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 1,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
// Now testing more complex requests that the get route can't represent
|
||||
|
||||
let (response, code) = index.fetch_documents(json!({ "ids": [] })).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 0
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_document_invalid_ids() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
let (task, _code) = index
|
||||
.add_documents(
|
||||
json!([
|
||||
{ "id": 0, "color": "red" },
|
||||
{ "id": 1, "color": "blue" },
|
||||
{ "id": 2, "color": "blue" },
|
||||
{ "id": 3 },
|
||||
]),
|
||||
Some("id"),
|
||||
)
|
||||
.await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index.fetch_documents(json!({"ids": ["0", "illegal/docid"] })).await;
|
||||
let (response2, code2) = index.get_all_documents_raw("?ids=0,illegal/docid").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"message": "In `.ids[1]`: Document identifier `\"illegal/docid\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||
"code": "invalid_document_ids",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_ids"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_document_not_found_ids() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
let (task, _code) = index
|
||||
.add_documents(
|
||||
json!([
|
||||
{ "id": 0, "color": "red" },
|
||||
{ "id": 1, "color": "blue" },
|
||||
{ "id": 2, "color": "blue" },
|
||||
{ "id": 3 },
|
||||
]),
|
||||
Some("id"),
|
||||
)
|
||||
.await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index.fetch_documents(json!({"ids": ["0", 3, 42] })).await;
|
||||
let (response2, code2) = index.get_all_documents_raw("?ids=0,3,42").await;
|
||||
// the document with id 42 is not in the results since it doesn't exist
|
||||
// however, no error is raised
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"color": "red"
|
||||
},
|
||||
{
|
||||
"id": 3
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_document_by_ids_and_filter() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
index.update_settings_filterable_attributes(json!(["color"])).await;
|
||||
let (task, _code) = index
|
||||
.add_documents(
|
||||
json!([
|
||||
{ "id": 0, "color": "red" },
|
||||
{ "id": 1, "color": "blue" },
|
||||
{ "id": 2, "color": "blue" },
|
||||
{ "id": 3 },
|
||||
]),
|
||||
Some("id"),
|
||||
)
|
||||
.await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) =
|
||||
index.fetch_documents(json!({"ids": [2], "filter": "color = blue" })).await;
|
||||
let (response2, code2) = index.get_all_documents_raw("?ids=2&filter=color=blue").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 2,
|
||||
"color": "blue"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 1
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
let (response, code) = index
|
||||
.fetch_documents(
|
||||
json!({ "offset": 1, "limit": 1, "ids": [0, 1, 2, 3], "filter": "color != blue" }),
|
||||
)
|
||||
.await;
|
||||
let (response2, code2) =
|
||||
index.get_all_documents_raw("?ids=0,1,2,3&filter=color!=blue&offset=1&limit=1").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 3
|
||||
}
|
||||
],
|
||||
"offset": 1,
|
||||
"limit": 1,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
let (response, code) = index
|
||||
.fetch_documents(json!({ "limit": 1, "ids": [0, 1, 2,3], "filter": "color != blue", "fields": ["color"] }))
|
||||
.await;
|
||||
let (response2, code2) =
|
||||
index.get_all_documents_raw("?ids=0,1,2,3&limit=1&filter=color!=blue&fields=color").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"color": "red"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 1,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
// Now testing more complex filter that the get route can't represent
|
||||
|
||||
let (response, code) = index
|
||||
.fetch_documents(json!({ "ids": [0, "2"], "filter": [["color = blue", "color = red"]] }))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"color": "red"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"color": "blue"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.fetch_documents(json!({ "filter": [["color != blue"], "color EXISTS"], "ids": [1, 2, 3] }))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 0
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_document_with_vectors() {
|
||||
let server = Server::new().await;
|
||||
|
||||
@@ -2132,7 +2132,8 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2254,7 +2255,8 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2358,7 +2360,8 @@ async fn generate_and_import_dump_containing_vectors() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
||||
@@ -23,7 +23,8 @@ async fn experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -37,7 +38,8 @@ async fn experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -51,7 +53,8 @@ async fn experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -66,7 +69,8 @@ async fn experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -81,7 +85,8 @@ async fn experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
@@ -103,7 +108,8 @@ async fn experimental_feature_metrics() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -138,14 +144,6 @@ async fn experimental_feature_metrics() {
|
||||
let (response, code) = server.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response, @"null");
|
||||
|
||||
// startup without flag respects persisted metrics value
|
||||
let disable_metrics =
|
||||
Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
|
||||
let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
|
||||
let (response, code) = server_no_flag.get_metrics().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response, @"null");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -158,7 +156,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
||||
@@ -615,3 +615,336 @@ async fn facet_search_with_filterable_attributes_rules_errors() {
|
||||
},
|
||||
).await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn distinct_facet_search_on_movies() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Carol",
|
||||
"genres": ["Romance", "Drama", "Blob"],
|
||||
"color": "crimson"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Wonder Woman",
|
||||
"genres": ["Action", "Adventure", "Blob"],
|
||||
"color": "emerald"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Life of Pi",
|
||||
"genres": ["Adventure", "Drama", "Blob"],
|
||||
"color": "azure"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Mad Max: Fury Road",
|
||||
"genres": ["Adventure", "Science Fiction", "Blob"],
|
||||
"color": "scarlet"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "Moana",
|
||||
"genres": ["Fantasy", "Action", "Blob"],
|
||||
"color": "coral"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"title": "Philadelphia",
|
||||
"genres": ["Drama", "Blob"],
|
||||
"color": "navy"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"title": "The Matrix",
|
||||
"genres": ["Science Fiction", "Action", "Blob"],
|
||||
"color": "onyx"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"title": "Inception",
|
||||
"genres": ["Science Fiction", "Thriller", "Blob"],
|
||||
"color": "cerulean"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"title": "The Shawshank Redemption",
|
||||
"genres": ["Drama", "Blob"],
|
||||
"color": "slate"
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"title": "Pulp Fiction",
|
||||
"genres": ["Crime", "Drama", "Blob"],
|
||||
"color": "gold"
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"title": "The Dark Knight",
|
||||
"genres": ["Action", "Crime", "Blob"],
|
||||
"color": "obsidian"
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"title": "Forrest Gump",
|
||||
"genres": ["Drama", "Romance", "Blob"],
|
||||
"color": "jade"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"title": "The Godfather",
|
||||
"genres": ["Crime", "Drama", "Blob"],
|
||||
"color": "sepia"
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"title": "Fight Club",
|
||||
"genres": ["Drama", "Thriller", "Blob"],
|
||||
"color": "ruby"
|
||||
},
|
||||
{
|
||||
"id": 15,
|
||||
"title": "Goodfellas",
|
||||
"genres": ["Crime", "Biography", "Blob"],
|
||||
"color": "charcoal"
|
||||
},
|
||||
{
|
||||
"id": 16,
|
||||
"title": "The Silence of the Lambs",
|
||||
"genres": ["Crime", "Thriller", "Blob"],
|
||||
"color": "amethyst"
|
||||
},
|
||||
{
|
||||
"id": 17,
|
||||
"title": "Schindler's List",
|
||||
"genres": ["Biography", "Drama", "Blob"],
|
||||
"color": "ebony"
|
||||
},
|
||||
{
|
||||
"id": 18,
|
||||
"title": "The Lord of the Rings",
|
||||
"genres": ["Adventure", "Fantasy", "Blob"],
|
||||
"color": "forest"
|
||||
},
|
||||
{
|
||||
"id": 19,
|
||||
"title": "Star Wars",
|
||||
"genres": ["Science Fiction", "Adventure", "Blob"],
|
||||
"color": "amber"
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"title": "Jurassic Park",
|
||||
"genres": ["Adventure", "Science Fiction", "Blob"],
|
||||
"color": "lime"
|
||||
},
|
||||
{
|
||||
"id": 21,
|
||||
"title": "Titanic",
|
||||
"genres": ["Drama", "Romance", "Blob"],
|
||||
"color": "sapphire"
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"title": "The Avengers",
|
||||
"genres": ["Action", "Science Fiction", "Blob"],
|
||||
"color": "burgundy"
|
||||
},
|
||||
{
|
||||
"id": 23,
|
||||
"title": "Avatar",
|
||||
"genres": ["Science Fiction", "Adventure", "Blob"],
|
||||
"color": "turquoise"
|
||||
},
|
||||
{
|
||||
"id": 24,
|
||||
"title": "The Green Mile",
|
||||
"genres": ["Crime", "Fantasy", "Blob"],
|
||||
"color": "emerald"
|
||||
},
|
||||
{
|
||||
"id": 25,
|
||||
"title": "Gladiator",
|
||||
"genres": ["Action", "Drama", "Blob"],
|
||||
"color": "sepia"
|
||||
},
|
||||
{
|
||||
"id": 26,
|
||||
"title": "The Departed",
|
||||
"genres": ["Crime", "Thriller", "Blob"],
|
||||
"color": "crimson"
|
||||
},
|
||||
{
|
||||
"id": 27,
|
||||
"title": "Saving Private Ryan",
|
||||
"genres": ["Drama", "War", "Blob"],
|
||||
"color": "slate"
|
||||
},
|
||||
{
|
||||
"id": 28,
|
||||
"title": "Interstellar",
|
||||
"genres": ["Science Fiction", "Adventure", "Blob"],
|
||||
"color": "azure"
|
||||
},
|
||||
{
|
||||
"id": 29,
|
||||
"title": "The Pianist",
|
||||
"genres": ["Biography", "Drama", "Blob"],
|
||||
"color": "onyx"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"title": "The Usual Suspects",
|
||||
"genres": ["Crime", "Mystery", "Blob"],
|
||||
"color": "charcoal"
|
||||
},
|
||||
{
|
||||
"id": 31,
|
||||
"title": "The Sixth Sense",
|
||||
"genres": ["Mystery", "Thriller", "Blob"],
|
||||
"color": "amethyst"
|
||||
},
|
||||
{
|
||||
"id": 32,
|
||||
"title": "The Princess Bride",
|
||||
"genres": ["Adventure", "Romance", "Blob"],
|
||||
"color": "ruby"
|
||||
},
|
||||
{
|
||||
"id": 33,
|
||||
"title": "Blade Runner",
|
||||
"genres": ["Science Fiction", "Noir", "Blob"],
|
||||
"color": "sapphire"
|
||||
},
|
||||
{
|
||||
"id": 34,
|
||||
"title": "The Big Lebowski",
|
||||
"genres": ["Comedy", "Crime", "Blob"],
|
||||
"color": "gold"
|
||||
},
|
||||
{
|
||||
"id": 35,
|
||||
"title": "Good Will Hunting",
|
||||
"genres": ["Drama", "Romance", "Blob"],
|
||||
"color": "turquoise"
|
||||
},
|
||||
{
|
||||
"id": 36,
|
||||
"title": "The Terminator",
|
||||
"genres": ["Action", "Science Fiction", "Blob"],
|
||||
"color": "obsidian"
|
||||
},
|
||||
{
|
||||
"id": 37,
|
||||
"title": "Casablanca",
|
||||
"genres": ["Drama", "Romance", "Blob"],
|
||||
"color": "jade"
|
||||
},
|
||||
{
|
||||
"id": 38,
|
||||
"title": "The Exorcist",
|
||||
"genres": ["Horror", "Thriller", "Blob"],
|
||||
"color": "burgundy"
|
||||
},
|
||||
{
|
||||
"id": 39,
|
||||
"title": "Apocalypse Now",
|
||||
"genres": ["Drama", "War", "Blob"],
|
||||
"color": "forest"
|
||||
},
|
||||
{
|
||||
"id": 40,
|
||||
"title": "Back to the Future",
|
||||
"genres": ["Adventure", "Comedy", "Blob"],
|
||||
"color": "amber"
|
||||
},
|
||||
{
|
||||
"id": 41,
|
||||
"title": "The Graduate",
|
||||
"genres": ["Comedy", "Drama", "Blob"],
|
||||
"color": "azure"
|
||||
},
|
||||
{
|
||||
"id": 42,
|
||||
"title": "Alien",
|
||||
"genres": ["Horror", "Science Fiction", "Blob"],
|
||||
"color": "obsidian"
|
||||
},
|
||||
{
|
||||
"id": 43,
|
||||
"title": "The Breakfast Club",
|
||||
"genres": ["Drama", "Comedy", "Blob"],
|
||||
"color": "coral"
|
||||
},
|
||||
{
|
||||
"id": 44,
|
||||
"title": "Die Hard",
|
||||
"genres": ["Action", "Thriller", "Blob"],
|
||||
"color": "scarlet"
|
||||
},
|
||||
{
|
||||
"id": 45,
|
||||
"title": "The Sound of Music",
|
||||
"genres": ["Drama", "Musical", "Blob"],
|
||||
"color": "emerald"
|
||||
},
|
||||
{
|
||||
"id": 46,
|
||||
"title": "Jaws",
|
||||
"genres": ["Horror", "Thriller", "Blob"],
|
||||
"color": "navy"
|
||||
},
|
||||
{
|
||||
"id": 47,
|
||||
"title": "Rocky",
|
||||
"genres": ["Drama", "Sport", "Blob"],
|
||||
"color": "burgundy"
|
||||
},
|
||||
{
|
||||
"id": 48,
|
||||
"title": "E.T. the Extra-Terrestrial",
|
||||
"genres": ["Adventure", "Science Fiction", "Blob"],
|
||||
"color": "amber"
|
||||
},
|
||||
{
|
||||
"id": 49,
|
||||
"title": "The Godfather Part II",
|
||||
"genres": ["Crime", "Drama", "Blob"],
|
||||
"color": "sepia"
|
||||
},
|
||||
{
|
||||
"id": 50,
|
||||
"title": "One Flew Over the Cuckoo's Nest",
|
||||
"genres": ["Drama", "Blob"],
|
||||
"color": "slate"
|
||||
}
|
||||
]);
|
||||
let (response, code) =
|
||||
index.update_settings_filterable_attributes(json!(["genres", "color"])).await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
let (response, code) = index.update_settings_distinct_attribute(json!("color")).await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetQuery": "blob", "facetName": "genres", "q": "" })).await;
|
||||
|
||||
// non-exhaustive facet count is counting 27 documents with the facet query "blob" but there are only 23 documents with a distinct color.
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(response["facetHits"], @r###"[{"value":"Blob","count":27}]"###);
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetQuery": "blob", "facetName": "genres", "q": "", "exhaustiveFacetCount": true })).await;
|
||||
|
||||
// exhaustive facet count is counting 23 documents with the facet query "blob" which is the number of distinct colors.
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(response["facetHits"], @r###"[{"value":"Blob","count":23}]"###);
|
||||
}
|
||||
|
||||
@@ -55,11 +55,11 @@ async fn similar_bad_id() {
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": ["doggo"]})).await;
|
||||
let (response, code) = index.similar_post(json!({"id": ["doggo"], "embedder": "manual"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||
"message": "Invalid value at `.id`: Document identifier `[\"doggo\"]` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||
"code": "invalid_similar_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||
@@ -145,11 +145,12 @@ async fn similar_invalid_id() {
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.similar_post(json!({"id": "http://invalid-docid/"})).await;
|
||||
let (response, code) =
|
||||
index.similar_post(json!({"id": "http://invalid-docid/", "embedder": "manual"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||
"message": "Invalid value at `.id`: Document identifier `\"http://invalid-docid/\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||
"code": "invalid_similar_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||
|
||||
@@ -43,7 +43,7 @@ async fn version_too_old() {
|
||||
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
|
||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.13.3");
|
||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.14.0");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -58,15 +58,12 @@ async fn version_requires_downgrade() {
|
||||
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
|
||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||
snapshot!(err, @"Database version 1.13.4 is higher than the Meilisearch version 1.13.3. Downgrade is not supported");
|
||||
snapshot!(err, @"Database version 1.14.1 is higher than the Meilisearch version 1.14.0. Downgrade is not supported");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn upgrade_to_the_current_version() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let server = Server::new_with_options(default_settings(temp.path())).await.unwrap();
|
||||
drop(server);
|
||||
|
||||
let server = Server::new_with_options(Opt {
|
||||
experimental_dumpless_upgrade: true,
|
||||
..default_settings(temp.path())
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.13.3"
|
||||
"upgradeTo": "v1.14.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.13.3"
|
||||
"upgradeTo": "v1.14.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.13.3"
|
||||
"upgradeTo": "v1.14.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.13.3"
|
||||
"upgradeTo": "v1.14.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.13.3"
|
||||
"upgradeTo": "v1.14.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.13.3"
|
||||
"upgradeTo": "v1.14.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.13.3"
|
||||
"upgradeTo": "v1.14.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.13.3"
|
||||
"upgradeTo": "v1.14.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -108,6 +108,10 @@ async fn check_the_keys(server: &Server) {
|
||||
/// 5.2. Enqueue a new task
|
||||
/// 5.3. Create an index
|
||||
async fn check_the_index_scheduler(server: &Server) {
|
||||
// Wait until the upgrade has been applied to all indexes to avoid flakyness
|
||||
let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await;
|
||||
server.wait_task(Value(tasks["results"][0].clone()).uid()).await.succeeded();
|
||||
|
||||
// All the indexes are still present
|
||||
let (indexes, _) = server.list_indexes(None, None).await;
|
||||
snapshot!(indexes, @r#"
|
||||
@@ -156,10 +160,6 @@ async fn check_the_index_scheduler(server: &Server) {
|
||||
}
|
||||
"###);
|
||||
|
||||
// Wait until the upgrade has been applied to all indexes to avoid flakyness
|
||||
let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await;
|
||||
server.wait_task(Value(tasks["results"][0].clone()).uid()).await.succeeded();
|
||||
|
||||
// Tasks and batches should still work
|
||||
// We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration.
|
||||
// The other tasks should NOT change
|
||||
|
||||
@@ -1995,7 +1995,7 @@ async fn timeout() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"q": "grand chien de berger des montagnes",
|
||||
"q": "grand chien de berger des montagnes foil the cache",
|
||||
"hybrid": {"semanticRatio": 0.99, "embedder": "default"}
|
||||
}))
|
||||
.await;
|
||||
|
||||
@@ -412,6 +412,117 @@ async fn ollama_url_checks() {
|
||||
async fn composite_checks() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
// feature not enabled, using source
|
||||
let (response, _code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": null
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": {
|
||||
"source": "composite",
|
||||
"searchEmbedder": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
},
|
||||
"indexingEmbedder": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
},
|
||||
}
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "using `\"composite\"` as source requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
// feature not enabled, using search embedder
|
||||
let (response, _code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": null
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": {
|
||||
"source": "userProvided",
|
||||
"searchEmbedder": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
}
|
||||
}
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "setting `searchEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
// feature not enabled, using indexing embedder
|
||||
let (response, _code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": null
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": {
|
||||
"source": "userProvided",
|
||||
"indexingEmbedder": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
}
|
||||
}
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "setting `indexingEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
// enable feature
|
||||
let (_, code) = server.set_features(json!({"compositeEmbedders": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
|
||||
// inner distribution
|
||||
let (response, _code) = index
|
||||
.update_settings(json!({
|
||||
|
||||
@@ -10,7 +10,6 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
|
||||
clap = { version = "4.5.24", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
|
||||
@@ -7,11 +7,11 @@ use anyhow::{bail, Context};
|
||||
use clap::{Parser, Subcommand, ValueEnum};
|
||||
use dump::{DumpWriter, IndexMetadata};
|
||||
use file_store::FileStore;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_auth::{open_auth_store_env, AuthController};
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::heed::types::{Bytes, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{
|
||||
CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
|
||||
CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, WithoutTls,
|
||||
};
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
@@ -172,7 +172,7 @@ fn main() -> anyhow::Result<()> {
|
||||
/// Clears the task queue located at `db_path`.
|
||||
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
|
||||
let path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) }
|
||||
let env = unsafe { EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&path) }
|
||||
.with_context(|| format!("While trying to open {:?}", path.display()))?;
|
||||
|
||||
eprintln!("Deleting tasks from the database...");
|
||||
@@ -225,7 +225,7 @@ fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
fn try_opening_database<KC: 'static, DC: 'static>(
|
||||
env: &Env,
|
||||
env: &Env<WithoutTls>,
|
||||
rtxn: &RoTxn,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<Database<KC, DC>> {
|
||||
@@ -235,7 +235,7 @@ fn try_opening_database<KC: 'static, DC: 'static>(
|
||||
}
|
||||
|
||||
fn try_opening_poly_database(
|
||||
env: &Env,
|
||||
env: &Env<WithoutTls>,
|
||||
rtxn: &RoTxn,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<Database<Unspecified, Unspecified>> {
|
||||
@@ -284,13 +284,18 @@ fn export_a_dump(
|
||||
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
|
||||
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
let env = unsafe {
|
||||
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
|
||||
}
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
eprintln!("Dumping the keys...");
|
||||
|
||||
// 2. dump the keys
|
||||
let auth_store = AuthController::new(&db_path, &None)
|
||||
let auth_path = db_path.join("auth");
|
||||
std::fs::create_dir_all(&auth_path).context("While creating the auth directory")?;
|
||||
let auth_env = open_auth_store_env(&auth_path).context("While opening the auth store")?;
|
||||
let auth_store = AuthController::new(auth_env, &None)
|
||||
.with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
|
||||
let mut dump_keys = dump.create_keys()?;
|
||||
let mut count = 0;
|
||||
@@ -386,9 +391,10 @@ fn export_a_dump(
|
||||
for result in index_mapping.iter(&rtxn)? {
|
||||
let (uid, uuid) = result?;
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
let index = Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
|
||||
.with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let metadata = IndexMetadata {
|
||||
@@ -438,8 +444,10 @@ fn export_a_dump(
|
||||
|
||||
fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
let env = unsafe {
|
||||
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
|
||||
}
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
let rtxn = env.read_txn()?;
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
@@ -456,9 +464,10 @@ fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
let index = Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
|
||||
.with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
|
||||
eprintln!("Awaiting for a mutable transaction...");
|
||||
let _wtxn = index.write_txn().context("While awaiting for a write transaction")?;
|
||||
@@ -470,7 +479,7 @@ fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
|
||||
eprintln!("Compacting the index...");
|
||||
let before_compaction = Instant::now();
|
||||
let new_file = index
|
||||
.copy_to_file(&compacted_index_file_path, CompactionOption::Enabled)
|
||||
.copy_to_path(&compacted_index_file_path, CompactionOption::Enabled)
|
||||
.with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?;
|
||||
|
||||
let after_size = new_file.metadata()?.len();
|
||||
@@ -514,8 +523,10 @@ fn export_documents(
|
||||
offset: Option<usize>,
|
||||
) -> anyhow::Result<()> {
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
let env = unsafe {
|
||||
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
|
||||
}
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
let rtxn = env.read_txn()?;
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
@@ -526,9 +537,10 @@ fn export_documents(
|
||||
if uid == index_name {
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index =
|
||||
Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
|
||||
.with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
@@ -616,8 +628,10 @@ fn hair_dryer(
|
||||
index_parts: &[IndexPart],
|
||||
) -> anyhow::Result<()> {
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
let env = unsafe {
|
||||
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
|
||||
}
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
eprintln!("Trying to get a read transaction on the index scheduler...");
|
||||
|
||||
@@ -630,9 +644,10 @@ fn hair_dryer(
|
||||
if index_names.iter().any(|i| i == uid) {
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index =
|
||||
Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
|
||||
.with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
|
||||
eprintln!("Trying to get a read transaction on the {uid} index...");
|
||||
|
||||
|
||||
@@ -2,7 +2,9 @@ use std::path::Path;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
|
||||
use meilisearch_types::heed::{
|
||||
Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, WithoutTls,
|
||||
};
|
||||
use meilisearch_types::milli::index::{db_name, main_key};
|
||||
|
||||
use super::v1_9;
|
||||
@@ -92,7 +94,7 @@ fn update_index_stats(
|
||||
|
||||
fn update_date_format(
|
||||
index_uid: &str,
|
||||
index_env: &Env,
|
||||
index_env: &Env<WithoutTls>,
|
||||
index_wtxn: &mut RwTxn,
|
||||
) -> anyhow::Result<()> {
|
||||
let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN)
|
||||
@@ -106,7 +108,7 @@ fn update_date_format(
|
||||
|
||||
fn find_rest_embedders(
|
||||
index_uid: &str,
|
||||
index_env: &Env,
|
||||
index_env: &Env<WithoutTls>,
|
||||
index_txn: &RoTxn,
|
||||
) -> anyhow::Result<Vec<String>> {
|
||||
let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN)
|
||||
@@ -164,8 +166,10 @@ pub fn v1_9_to_v1_10(
|
||||
// 2. REST embedders. We don't support this case right now, so bail
|
||||
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
let env = unsafe {
|
||||
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
|
||||
}
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
let mut sched_wtxn = env.write_txn()?;
|
||||
|
||||
@@ -205,9 +209,13 @@ pub fn v1_9_to_v1_10(
|
||||
|
||||
let index_env = unsafe {
|
||||
// FIXME: fetch the 25 magic number from the index file
|
||||
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
|
||||
format!("while opening index {uid} at '{}'", index_path.display())
|
||||
})?
|
||||
EnvOpenOptions::new()
|
||||
.read_txn_without_tls()
|
||||
.max_dbs(25)
|
||||
.open(&index_path)
|
||||
.with_context(|| {
|
||||
format!("while opening index {uid} at '{}'", index_path.display())
|
||||
})?
|
||||
};
|
||||
|
||||
let index_txn = index_env.read_txn().with_context(|| {
|
||||
@@ -252,9 +260,13 @@ pub fn v1_9_to_v1_10(
|
||||
|
||||
let index_env = unsafe {
|
||||
// FIXME: fetch the 25 magic number from the index file
|
||||
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
|
||||
format!("while opening index {uid} at '{}'", index_path.display())
|
||||
})?
|
||||
EnvOpenOptions::new()
|
||||
.read_txn_without_tls()
|
||||
.max_dbs(25)
|
||||
.open(&index_path)
|
||||
.with_context(|| {
|
||||
format!("while opening index {uid} at '{}'", index_path.display())
|
||||
})?
|
||||
};
|
||||
|
||||
let mut index_wtxn = index_env.write_txn().with_context(|| {
|
||||
|
||||
@@ -23,8 +23,10 @@ pub fn v1_10_to_v1_11(
|
||||
println!("Upgrading from v1.10.0 to v1.11.0");
|
||||
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
let env = unsafe {
|
||||
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
|
||||
}
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
let sched_rtxn = env.read_txn()?;
|
||||
|
||||
@@ -50,9 +52,13 @@ pub fn v1_10_to_v1_11(
|
||||
);
|
||||
|
||||
let index_env = unsafe {
|
||||
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
|
||||
format!("while opening index {uid} at '{}'", index_path.display())
|
||||
})?
|
||||
EnvOpenOptions::new()
|
||||
.read_txn_without_tls()
|
||||
.max_dbs(25)
|
||||
.open(&index_path)
|
||||
.with_context(|| {
|
||||
format!("while opening index {uid} at '{}'", index_path.display())
|
||||
})?
|
||||
};
|
||||
|
||||
let index_rtxn = index_env.read_txn().with_context(|| {
|
||||
@@ -76,11 +82,11 @@ pub fn v1_10_to_v1_11(
|
||||
try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY)
|
||||
.with_context(|| format!("while updating date format for index `{uid}`"))?;
|
||||
|
||||
arroy_v04_to_v05::ugrade_from_prev_version(
|
||||
meilisearch_types::milli::arroy::upgrade::cosine_from_0_4_to_0_5(
|
||||
&index_rtxn,
|
||||
index_read_database,
|
||||
index_read_database.remap_types(),
|
||||
&mut index_wtxn,
|
||||
index_write_database,
|
||||
index_write_database.remap_types(),
|
||||
)?;
|
||||
|
||||
index_wtxn.commit()?;
|
||||
|
||||
@@ -115,8 +115,10 @@ fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
|
||||
/// Rebuild field distribution as it was wrongly computed in v1.12.x if x < 3
|
||||
fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> {
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
let env = unsafe {
|
||||
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
|
||||
}
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
let mut sched_wtxn = env.write_txn()?;
|
||||
|
||||
@@ -173,11 +175,12 @@ fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> {
|
||||
|
||||
println!("\t- Rebuilding field distribution");
|
||||
|
||||
let index =
|
||||
meilisearch_types::milli::Index::new(EnvOpenOptions::new(), &index_path, false)
|
||||
.with_context(|| {
|
||||
format!("while opening index {uid} at '{}'", index_path.display())
|
||||
})?;
|
||||
let index = meilisearch_types::milli::Index::new(
|
||||
EnvOpenOptions::new().read_txn_without_tls(),
|
||||
&index_path,
|
||||
false,
|
||||
)
|
||||
.with_context(|| format!("while opening index {uid} at '{}'", index_path.display()))?;
|
||||
|
||||
let mut index_txn = index.write_txn()?;
|
||||
|
||||
|
||||
@@ -28,11 +28,13 @@ flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
geoutils = "0.5.1"
|
||||
grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] }
|
||||
heed = { version = "0.20.5", default-features = false, features = [
|
||||
grenad = { version = "0.5.0", default-features = false, features = [
|
||||
"rayon",
|
||||
"tempfile",
|
||||
] }
|
||||
heed = { version = "0.22.0", default-features = false, features = [
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
"read-txn-no-tls",
|
||||
] }
|
||||
indexmap = { version = "2.7.0", features = ["serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
@@ -85,7 +87,7 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838
|
||||
"no_time",
|
||||
"sync",
|
||||
] }
|
||||
arroy = "0.5.0"
|
||||
arroy = "0.6.1"
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.41"
|
||||
ureq = { version = "2.12.1", features = ["json"] }
|
||||
@@ -101,7 +103,14 @@ uell = "0.1.0"
|
||||
enum-iterator = "2.1.0"
|
||||
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
|
||||
flume = { version = "0.11.1", default-features = false }
|
||||
utoipa = { version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
|
||||
utoipa = { version = "5.3.1", features = [
|
||||
"non_strict_integers",
|
||||
"preserve_order",
|
||||
"uuid",
|
||||
"time",
|
||||
"openapi_extensions",
|
||||
] }
|
||||
lru = "0.13.0"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
@@ -113,9 +122,7 @@ meili-snap = { path = "../meili-snap" }
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
[features]
|
||||
all-tokenizations = [
|
||||
"charabia/default",
|
||||
]
|
||||
all-tokenizations = ["charabia/default"]
|
||||
|
||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||
# For more information on this feature, see heed's Cargo.toml
|
||||
|
||||
@@ -80,9 +80,13 @@ impl DocumentsBatchIndex {
|
||||
let mut map = Object::new();
|
||||
|
||||
for (k, v) in document.iter() {
|
||||
// TODO: TAMO: update the error type
|
||||
let key =
|
||||
self.0.get_by_left(&k).ok_or(crate::error::InternalError::DatabaseClosing)?.clone();
|
||||
let key = self
|
||||
.0
|
||||
.get_by_left(&k)
|
||||
.ok_or(crate::error::InternalError::FieldIdMapMissingEntry(
|
||||
FieldIdMapMissingEntry::FieldId { field_id: k, process: "recreate_json" },
|
||||
))?
|
||||
.clone();
|
||||
let value = serde_json::from_slice::<serde_json::Value>(v)
|
||||
.map_err(crate::error::InternalError::SerdeJson)?;
|
||||
map.insert(key, value);
|
||||
|
||||
@@ -33,8 +33,6 @@ pub enum Error {
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum InternalError {
|
||||
#[error("{}", HeedError::DatabaseClosing)]
|
||||
DatabaseClosing,
|
||||
#[error("missing {} in the {db_name} database", key.unwrap_or("key"))]
|
||||
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
|
||||
#[error("missing {key} in the fieldids weights mapping")]
|
||||
@@ -197,8 +195,8 @@ and can not be more than 511 bytes.", .document_id.to_string()
|
||||
valid_fields: BTreeSet<String>,
|
||||
hidden_fields: bool,
|
||||
},
|
||||
#[error("an environment is already opened with different options")]
|
||||
InvalidLmdbOpenOptions,
|
||||
#[error("An LMDB environment is already opened")]
|
||||
EnvAlreadyOpened,
|
||||
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
|
||||
SortRankingRuleMissing,
|
||||
#[error("The database file is in an invalid state.")]
|
||||
@@ -362,7 +360,8 @@ impl From<arroy::Error> for Error {
|
||||
| arroy::Error::UnmatchingDistance { .. }
|
||||
| arroy::Error::NeedBuild(_)
|
||||
| arroy::Error::MissingKey { .. }
|
||||
| arroy::Error::MissingMetadata(_) => {
|
||||
| arroy::Error::MissingMetadata(_)
|
||||
| arroy::Error::CannotDecodeKeyMode { .. } => {
|
||||
Error::InternalError(InternalError::ArroyError(value))
|
||||
}
|
||||
}
|
||||
@@ -516,8 +515,7 @@ impl From<HeedError> for Error {
|
||||
// TODO use the encoding
|
||||
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
|
||||
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
|
||||
HeedError::DatabaseClosing => InternalError(DatabaseClosing),
|
||||
HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
|
||||
HeedError::EnvAlreadyOpened { .. } => UserError(EnvAlreadyOpened),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
use heed::types::*;
|
||||
use heed::{types::*, WithoutTls};
|
||||
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
@@ -110,7 +110,7 @@ pub mod db_name {
|
||||
#[derive(Clone)]
|
||||
pub struct Index {
|
||||
/// The LMDB environment which this index is associated with.
|
||||
pub(crate) env: heed::Env,
|
||||
pub(crate) env: heed::Env<WithoutTls>,
|
||||
|
||||
/// Contains many different types (e.g. the fields ids map).
|
||||
pub(crate) main: Database<Unspecified, Unspecified>,
|
||||
@@ -177,7 +177,7 @@ pub struct Index {
|
||||
|
||||
impl Index {
|
||||
pub fn new_with_creation_dates<P: AsRef<Path>>(
|
||||
mut options: heed::EnvOpenOptions,
|
||||
mut options: heed::EnvOpenOptions<WithoutTls>,
|
||||
path: P,
|
||||
created_at: time::OffsetDateTime,
|
||||
updated_at: time::OffsetDateTime,
|
||||
@@ -275,7 +275,7 @@ impl Index {
|
||||
}
|
||||
|
||||
pub fn new<P: AsRef<Path>>(
|
||||
options: heed::EnvOpenOptions,
|
||||
options: heed::EnvOpenOptions<WithoutTls>,
|
||||
path: P,
|
||||
creation: bool,
|
||||
) -> Result<Index> {
|
||||
@@ -284,7 +284,7 @@ impl Index {
|
||||
}
|
||||
|
||||
fn set_creation_dates(
|
||||
env: &heed::Env,
|
||||
env: &heed::Env<WithoutTls>,
|
||||
main: Database<Unspecified, Unspecified>,
|
||||
created_at: time::OffsetDateTime,
|
||||
updated_at: time::OffsetDateTime,
|
||||
@@ -306,12 +306,12 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Create a read transaction to be able to read the index.
|
||||
pub fn read_txn(&self) -> heed::Result<RoTxn<'_>> {
|
||||
pub fn read_txn(&self) -> heed::Result<RoTxn<'_, WithoutTls>> {
|
||||
self.env.read_txn()
|
||||
}
|
||||
|
||||
/// Create a static read transaction to be able to read the index without keeping a reference to it.
|
||||
pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static>> {
|
||||
pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static, WithoutTls>> {
|
||||
self.env.clone().static_read_txn()
|
||||
}
|
||||
|
||||
@@ -340,8 +340,12 @@ impl Index {
|
||||
self.env.info().map_size
|
||||
}
|
||||
|
||||
pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
|
||||
self.env.copy_to_file(path, option).map_err(Into::into)
|
||||
pub fn copy_to_file(&self, file: &mut File, option: CompactionOption) -> Result<()> {
|
||||
self.env.copy_to_file(file, option).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn copy_to_path<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
|
||||
self.env.copy_to_path(path, option).map_err(Into::into)
|
||||
}
|
||||
|
||||
/// Returns an `EnvClosingEvent` that can be used to wait for the closing event,
|
||||
@@ -1825,7 +1829,8 @@ pub(crate) mod tests {
|
||||
impl TempIndex {
|
||||
/// Creates a temporary index
|
||||
pub fn new_with_map_size(size: usize) -> Self {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(size);
|
||||
let _tempdir = TempDir::new_in(".").unwrap();
|
||||
let inner = Index::new(options, _tempdir.path(), true).unwrap();
|
||||
|
||||
@@ -83,6 +83,8 @@ pub use self::search::{
|
||||
};
|
||||
pub use self::update::ChannelCongestion;
|
||||
|
||||
pub use arroy;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||
|
||||
pub type Attribute = u32;
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use enum_iterator::Sequence;
|
||||
use std::any::TypeId;
|
||||
use std::borrow::Cow;
|
||||
use std::marker::PhantomData;
|
||||
@@ -76,6 +77,14 @@ impl Progress {
|
||||
|
||||
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
|
||||
}
|
||||
|
||||
// TODO: ideally we should expose the progress in a way that let arroy use it directly
|
||||
pub(crate) fn update_progress_from_arroy(&self, progress: arroy::WriterProgress) {
|
||||
self.update_progress(progress.main);
|
||||
if let Some(sub) = progress.sub {
|
||||
self.update_progress(sub);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate the names associated with the durations and push them.
|
||||
@@ -238,3 +247,44 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
|
||||
self.total
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for arroy::MainStep {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
match self {
|
||||
arroy::MainStep::PreProcessingTheItems => "pre processing the items",
|
||||
arroy::MainStep::WritingTheDescendantsAndMetadata => {
|
||||
"writing the descendants and metadata"
|
||||
}
|
||||
arroy::MainStep::RetrieveTheUpdatedItems => "retrieve the updated items",
|
||||
arroy::MainStep::RetrievingTheTreeAndItemNodes => "retrieving the tree and item nodes",
|
||||
arroy::MainStep::UpdatingTheTrees => "updating the trees",
|
||||
arroy::MainStep::CreateNewTrees => "create new trees",
|
||||
arroy::MainStep::WritingNodesToDatabase => "writing nodes to database",
|
||||
arroy::MainStep::DeleteExtraneousTrees => "delete extraneous trees",
|
||||
arroy::MainStep::WriteTheMetadata => "write the metadata",
|
||||
}
|
||||
.into()
|
||||
}
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
*self as u32
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
Self::CARDINALITY as u32
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for arroy::SubStep {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
self.unit.into()
|
||||
}
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
self.current.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
self.max
|
||||
}
|
||||
}
|
||||
|
||||
@@ -203,7 +203,7 @@ impl<'a> Search<'a> {
|
||||
|
||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3);
|
||||
|
||||
match embedder.embed_search(query, Some(deadline)) {
|
||||
match embedder.embed_search(&query, Some(deadline)) {
|
||||
Ok(embedding) => embedding,
|
||||
Err(error) => {
|
||||
tracing::error!(error=%error, "Embedding failed");
|
||||
|
||||
@@ -15,7 +15,8 @@ use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
|
||||
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path, true).unwrap();
|
||||
|
||||
|
||||
@@ -352,7 +352,7 @@ pub(crate) mod test_helpers {
|
||||
|
||||
use grenad::MergerBuilder;
|
||||
use heed::types::Bytes;
|
||||
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
|
||||
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::bulk::FacetsUpdateBulkInner;
|
||||
@@ -390,7 +390,7 @@ pub(crate) mod test_helpers {
|
||||
for<'a> BoundCodec:
|
||||
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
||||
{
|
||||
pub env: Env,
|
||||
pub env: Env<WithoutTls>,
|
||||
pub content: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
pub group_size: Cell<u8>,
|
||||
pub min_level_size: Cell<u8>,
|
||||
@@ -412,7 +412,8 @@ pub(crate) mod test_helpers {
|
||||
let group_size = group_size.clamp(2, 127);
|
||||
let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
|
||||
let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
let options = heed::EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
let options = options.map_size(4096 * 4 * 1000 * 100);
|
||||
let tempdir = tempfile::TempDir::new().unwrap();
|
||||
let env = unsafe { options.open(tempdir.path()) }.unwrap();
|
||||
|
||||
@@ -31,6 +31,7 @@ use super::new::StdResult;
|
||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{PrefixSearch, PrefixSettings};
|
||||
use crate::progress::Progress;
|
||||
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||
use crate::update::{
|
||||
@@ -520,7 +521,16 @@ where
|
||||
|
||||
pool.install(|| {
|
||||
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
|
||||
writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing, cancel)?;
|
||||
writer.build_and_quantize(
|
||||
wtxn,
|
||||
// In the settings we don't have any progress to share
|
||||
&Progress::default(),
|
||||
&mut rng,
|
||||
dimension,
|
||||
is_quantizing,
|
||||
self.indexer_config.max_memory,
|
||||
cancel,
|
||||
)?;
|
||||
Result::Ok(())
|
||||
})
|
||||
.map_err(InternalError::from)??;
|
||||
@@ -2799,8 +2809,9 @@ mod tests {
|
||||
embedding_configs.pop().unwrap();
|
||||
insta::assert_snapshot!(embedder_name, @"manual");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[0, 1, 2]>");
|
||||
let embedder =
|
||||
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
|
||||
let embedder = std::sync::Arc::new(
|
||||
crate::vector::Embedder::new(embedder.embedder_options, 0).unwrap(),
|
||||
);
|
||||
let res = index
|
||||
.search(&rtxn)
|
||||
.semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()))
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::sync::atomic::Ordering;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use bumpalo::Bump;
|
||||
use heed::RoTxn;
|
||||
use heed::{RoTxn, WithoutTls};
|
||||
use rayon::iter::IndexedParallelIterator;
|
||||
|
||||
use super::super::document_change::DocumentChange;
|
||||
@@ -28,7 +28,7 @@ pub struct DocumentChangeContext<
|
||||
/// inside of the DB.
|
||||
pub db_fields_ids_map: &'indexer FieldsIdsMap,
|
||||
/// A transaction providing data from the DB before all indexing operations
|
||||
pub rtxn: RoTxn<'indexer>,
|
||||
pub rtxn: RoTxn<'indexer, WithoutTls>,
|
||||
|
||||
/// Global field id map that is up to date with the current state of the indexing process.
|
||||
///
|
||||
|
||||
@@ -62,6 +62,8 @@ where
|
||||
let mut bbbuffers = Vec::new();
|
||||
let finished_extraction = AtomicBool::new(false);
|
||||
|
||||
let arroy_memory = grenad_parameters.max_memory;
|
||||
|
||||
// We reduce the actual memory used to 5%. The reason we do this here and not in Meilisearch
|
||||
// is because we still use the old indexer for the settings and it is highly impacted by the
|
||||
// max memory. So we keep the changes here and will remove these changes once we use the new
|
||||
@@ -199,7 +201,9 @@ where
|
||||
build_vectors(
|
||||
index,
|
||||
wtxn,
|
||||
indexing_context.progress,
|
||||
index_embeddings,
|
||||
arroy_memory,
|
||||
&mut arroy_writers,
|
||||
&indexing_context.must_stop_processing,
|
||||
)
|
||||
|
||||
@@ -78,7 +78,7 @@ fn compute_word_fst(index: &Index, wtxn: &mut RwTxn) -> Result<Option<PrefixDelt
|
||||
EitherOrBoth::Both(lhs, rhs) => {
|
||||
let (word, lhs_bytes) = lhs?;
|
||||
let (_, rhs_bytes) = rhs?;
|
||||
if lhs_bytes != rhs_bytes || modified_fid_docids_databases(index, wtxn, word)? {
|
||||
if lhs_bytes != rhs_bytes {
|
||||
word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?;
|
||||
}
|
||||
}
|
||||
@@ -107,43 +107,6 @@ fn compute_word_fst(index: &Index, wtxn: &mut RwTxn) -> Result<Option<PrefixDelt
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare the fid docids databases for a given word
|
||||
/// and register the changes in the word fst builder if there is any difference
|
||||
fn modified_fid_docids_databases(index: &Index, wtxn: &RwTxn, word: &str) -> Result<bool> {
|
||||
let rtxn = index.read_txn()?;
|
||||
let previous_words =
|
||||
index.word_fid_docids.remap_types::<Bytes, Bytes>().prefix_iter(&rtxn, word.as_bytes())?;
|
||||
let current_words =
|
||||
index.word_fid_docids.remap_types::<Bytes, Bytes>().prefix_iter(wtxn, word.as_bytes())?;
|
||||
|
||||
for eob in merge_join_by(previous_words, current_words, |lhs, rhs| match (lhs, rhs) {
|
||||
(Ok((l, _)), Ok((r, _))) => l.cmp(r),
|
||||
(Err(_), _) | (_, Err(_)) => Ordering::Equal,
|
||||
}) {
|
||||
match eob {
|
||||
EitherOrBoth::Both(lhs, rhs) => {
|
||||
let (_key_bytes, lhs_bytes) = lhs?;
|
||||
let (_, rhs_bytes) = rhs?;
|
||||
|
||||
if lhs_bytes != rhs_bytes {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
EitherOrBoth::Left(result) => {
|
||||
let (_key_bytes, _) = result?;
|
||||
|
||||
return Ok(true);
|
||||
}
|
||||
EitherOrBoth::Right(result) => {
|
||||
let (_key_bytes, _) = result?;
|
||||
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")]
|
||||
fn compute_facet_search_database(
|
||||
index: &Index,
|
||||
|
||||
@@ -10,6 +10,7 @@ use super::super::channel::*;
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::progress::Progress;
|
||||
use crate::update::settings::InnerIndexSettings;
|
||||
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings};
|
||||
use crate::{Error, Index, InternalError, Result};
|
||||
@@ -100,7 +101,9 @@ impl ChannelCongestion {
|
||||
pub fn build_vectors<MSP>(
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
progress: &Progress,
|
||||
index_embeddings: Vec<IndexEmbeddingConfig>,
|
||||
arroy_memory: Option<usize>,
|
||||
arroy_writers: &mut HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
|
||||
must_stop_processing: &MSP,
|
||||
) -> Result<()>
|
||||
@@ -111,10 +114,19 @@ where
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
let seed = rand::random();
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
|
||||
for (_index, (_embedder_name, _embedder, writer, dimensions)) in arroy_writers {
|
||||
let dimensions = *dimensions;
|
||||
writer.build_and_quantize(wtxn, &mut rng, dimensions, false, must_stop_processing)?;
|
||||
writer.build_and_quantize(
|
||||
wtxn,
|
||||
progress,
|
||||
&mut rng,
|
||||
dimensions,
|
||||
false,
|
||||
arroy_memory,
|
||||
must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
index.put_embedding_configs(wtxn, index_embeddings)?;
|
||||
|
||||
@@ -205,15 +205,22 @@ impl WordPrefixIntegerDocids {
|
||||
let (ref mut index, ref mut file, ref mut buffer) = *refmut;
|
||||
|
||||
for (&pos, bitmaps_bytes) in frozen.bitmaps(prefix).unwrap() {
|
||||
let output = bitmaps_bytes
|
||||
.iter()
|
||||
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
|
||||
.union()?;
|
||||
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
|
||||
index.push(PrefixIntegerEntry { prefix, pos, serialized_length: buffer.len() });
|
||||
file.write_all(buffer)?;
|
||||
if bitmaps_bytes.is_empty() {
|
||||
index.push(PrefixIntegerEntry { prefix, pos, serialized_length: None });
|
||||
} else {
|
||||
let output = bitmaps_bytes
|
||||
.iter()
|
||||
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
|
||||
.union()?;
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
|
||||
index.push(PrefixIntegerEntry {
|
||||
prefix,
|
||||
pos,
|
||||
serialized_length: Some(buffer.len()),
|
||||
});
|
||||
file.write_all(buffer)?;
|
||||
}
|
||||
}
|
||||
|
||||
Result::Ok(())
|
||||
@@ -230,14 +237,24 @@ impl WordPrefixIntegerDocids {
|
||||
file.rewind()?;
|
||||
let mut file = BufReader::new(file);
|
||||
for PrefixIntegerEntry { prefix, pos, serialized_length } in index {
|
||||
buffer.resize(serialized_length, 0);
|
||||
file.read_exact(&mut buffer)?;
|
||||
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(prefix.as_bytes());
|
||||
key_buffer.push(0);
|
||||
key_buffer.extend_from_slice(&pos.to_be_bytes());
|
||||
self.prefix_database.remap_data_type::<Bytes>().put(wtxn, &key_buffer, &buffer)?;
|
||||
match serialized_length {
|
||||
Some(serialized_length) => {
|
||||
buffer.resize(serialized_length, 0);
|
||||
file.read_exact(&mut buffer)?;
|
||||
self.prefix_database.remap_data_type::<Bytes>().put(
|
||||
wtxn,
|
||||
&key_buffer,
|
||||
&buffer,
|
||||
)?;
|
||||
}
|
||||
None => {
|
||||
self.prefix_database.delete(wtxn, &key_buffer)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -249,7 +266,7 @@ impl WordPrefixIntegerDocids {
|
||||
struct PrefixIntegerEntry<'a> {
|
||||
prefix: &'a str,
|
||||
pos: u16,
|
||||
serialized_length: usize,
|
||||
serialized_length: Option<usize>,
|
||||
}
|
||||
|
||||
/// TODO doc
|
||||
|
||||
@@ -1628,7 +1628,8 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
|
||||
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
||||
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(embedder_options.clone())
|
||||
// cache_cap: no cache needed for indexing purposes
|
||||
Embedder::new(embedder_options.clone(), 0)
|
||||
.map_err(crate::vector::Error::from)
|
||||
.map_err(crate::Error::from)?,
|
||||
);
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
mod v1_12;
|
||||
mod v1_13;
|
||||
mod v1_14;
|
||||
|
||||
use heed::RwTxn;
|
||||
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
|
||||
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Current};
|
||||
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
|
||||
use v1_14::Latest_V1_13_To_Latest_V1_14;
|
||||
|
||||
use crate::progress::{Progress, VariableNameStep};
|
||||
use crate::{Index, InternalError, Result};
|
||||
|
||||
trait UpgradeIndex {
|
||||
/// Returns true if the index scheduler must regenerate its cached stats
|
||||
/// Returns `true` if the index scheduler must regenerate its cached stats.
|
||||
fn upgrade(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
@@ -32,15 +34,17 @@ pub fn upgrade(
|
||||
&V1_12_To_V1_12_3 {},
|
||||
&V1_12_3_To_V1_13_0 {},
|
||||
&V1_13_0_To_V1_13_1 {},
|
||||
&V1_13_1_To_Current {},
|
||||
&V1_13_1_To_Latest_V1_13 {},
|
||||
&Latest_V1_13_To_Latest_V1_14 {},
|
||||
];
|
||||
|
||||
let start = match from {
|
||||
(1, 12, 0..=2) => 0,
|
||||
(1, 12, 3..) => 1,
|
||||
(1, 13, 0) => 2,
|
||||
(1, 13, _) => 4,
|
||||
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
|
||||
(1, 13, _) => 3,
|
||||
(1, 14, _) => 4,
|
||||
(major, minor, patch) => {
|
||||
return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into())
|
||||
}
|
||||
@@ -50,7 +54,6 @@ pub fn upgrade(
|
||||
let upgrade_path = &upgrade_functions[start..];
|
||||
|
||||
let mut current_version = from;
|
||||
|
||||
let mut regenerate_stats = false;
|
||||
for (i, upgrade) in upgrade_path.iter().enumerate() {
|
||||
let target = upgrade.target_version();
|
||||
|
||||
@@ -37,9 +37,9 @@ impl UpgradeIndex for V1_13_0_To_V1_13_1 {
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub(super) struct V1_13_1_To_Current();
|
||||
pub(super) struct V1_13_1_To_Latest_V1_13();
|
||||
|
||||
impl UpgradeIndex for V1_13_1_To_Current {
|
||||
impl UpgradeIndex for V1_13_1_To_Latest_V1_13 {
|
||||
fn upgrade(
|
||||
&self,
|
||||
_wtxn: &mut RwTxn,
|
||||
|
||||
41
crates/milli/src/update/upgrade/v1_14.rs
Normal file
41
crates/milli/src/update/upgrade/v1_14.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use arroy::distances::Cosine;
|
||||
use heed::RwTxn;
|
||||
|
||||
use super::UpgradeIndex;
|
||||
use crate::progress::Progress;
|
||||
use crate::{make_enum_progress, Index, Result};
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub(super) struct Latest_V1_13_To_Latest_V1_14();
|
||||
|
||||
impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 {
|
||||
fn upgrade(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
_original: (u32, u32, u32),
|
||||
progress: Progress,
|
||||
) -> Result<bool> {
|
||||
make_enum_progress! {
|
||||
enum VectorStore {
|
||||
UpdateInternalVersions,
|
||||
}
|
||||
};
|
||||
|
||||
progress.update_progress(VectorStore::UpdateInternalVersions);
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
arroy::upgrade::from_0_5_to_0_6::<Cosine>(
|
||||
&rtxn,
|
||||
index.vector_arroy.remap_data_type(),
|
||||
wtxn,
|
||||
index.vector_arroy.remap_data_type(),
|
||||
)?;
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn target_version(&self) -> (u32, u32, u32) {
|
||||
(1, 14, 0)
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,8 @@ use arroy::Distance;
|
||||
|
||||
use super::error::CompositeEmbedderContainsHuggingFace;
|
||||
use super::{
|
||||
hf, manual, ollama, openai, rest, DistributionShift, EmbedError, Embedding, NewEmbedderError,
|
||||
hf, manual, ollama, openai, rest, DistributionShift, EmbedError, Embedding, EmbeddingCache,
|
||||
NewEmbedderError,
|
||||
};
|
||||
use crate::ThreadPoolNoAbort;
|
||||
|
||||
@@ -58,9 +59,11 @@ pub struct EmbedderOptions {
|
||||
impl Embedder {
|
||||
pub fn new(
|
||||
EmbedderOptions { search, index }: EmbedderOptions,
|
||||
cache_cap: usize,
|
||||
) -> Result<Self, NewEmbedderError> {
|
||||
let search = SubEmbedder::new(search)?;
|
||||
let index = SubEmbedder::new(index)?;
|
||||
let search = SubEmbedder::new(search, cache_cap)?;
|
||||
// cache is only used at search
|
||||
let index = SubEmbedder::new(index, 0)?;
|
||||
|
||||
// check dimensions
|
||||
if search.dimensions() != index.dimensions() {
|
||||
@@ -118,19 +121,28 @@ impl Embedder {
|
||||
}
|
||||
|
||||
impl SubEmbedder {
|
||||
pub fn new(options: SubEmbedderOptions) -> std::result::Result<Self, NewEmbedderError> {
|
||||
pub fn new(
|
||||
options: SubEmbedderOptions,
|
||||
cache_cap: usize,
|
||||
) -> std::result::Result<Self, NewEmbedderError> {
|
||||
Ok(match options {
|
||||
SubEmbedderOptions::HuggingFace(options) => {
|
||||
Self::HuggingFace(hf::Embedder::new(options)?)
|
||||
Self::HuggingFace(hf::Embedder::new(options, cache_cap)?)
|
||||
}
|
||||
SubEmbedderOptions::OpenAi(options) => {
|
||||
Self::OpenAi(openai::Embedder::new(options, cache_cap)?)
|
||||
}
|
||||
SubEmbedderOptions::Ollama(options) => {
|
||||
Self::Ollama(ollama::Embedder::new(options, cache_cap)?)
|
||||
}
|
||||
SubEmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?),
|
||||
SubEmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?),
|
||||
SubEmbedderOptions::UserProvided(options) => {
|
||||
Self::UserProvided(manual::Embedder::new(options))
|
||||
}
|
||||
SubEmbedderOptions::Rest(options) => {
|
||||
Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?)
|
||||
}
|
||||
SubEmbedderOptions::Rest(options) => Self::Rest(rest::Embedder::new(
|
||||
options,
|
||||
cache_cap,
|
||||
rest::ConfigurationSource::User,
|
||||
)?),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -148,6 +160,27 @@ impl SubEmbedder {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn embed_one(
|
||||
&self,
|
||||
text: &str,
|
||||
deadline: Option<Instant>,
|
||||
) -> std::result::Result<Embedding, EmbedError> {
|
||||
match self {
|
||||
SubEmbedder::HuggingFace(embedder) => embedder.embed_one(text),
|
||||
SubEmbedder::OpenAi(embedder) => {
|
||||
embedder.embed(&[text], deadline)?.pop().ok_or_else(EmbedError::missing_embedding)
|
||||
}
|
||||
SubEmbedder::Ollama(embedder) => {
|
||||
embedder.embed(&[text], deadline)?.pop().ok_or_else(EmbedError::missing_embedding)
|
||||
}
|
||||
SubEmbedder::UserProvided(embedder) => embedder.embed_one(text),
|
||||
SubEmbedder::Rest(embedder) => embedder
|
||||
.embed_ref(&[text], deadline)?
|
||||
.pop()
|
||||
.ok_or_else(EmbedError::missing_embedding),
|
||||
}
|
||||
}
|
||||
|
||||
/// Embed multiple chunks of texts.
|
||||
///
|
||||
/// Each chunk is composed of one or multiple texts.
|
||||
@@ -233,6 +266,16 @@ impl SubEmbedder {
|
||||
SubEmbedder::Rest(embedder) => embedder.distribution(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn cache(&self) -> Option<&EmbeddingCache> {
|
||||
match self {
|
||||
SubEmbedder::HuggingFace(embedder) => Some(embedder.cache()),
|
||||
SubEmbedder::OpenAi(embedder) => Some(embedder.cache()),
|
||||
SubEmbedder::UserProvided(_) => None,
|
||||
SubEmbedder::Ollama(embedder) => Some(embedder.cache()),
|
||||
SubEmbedder::Rest(embedder) => Some(embedder.cache()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn check_similarity(
|
||||
|
||||
@@ -7,7 +7,7 @@ use hf_hub::{Repo, RepoType};
|
||||
use tokenizers::{PaddingParams, Tokenizer};
|
||||
|
||||
pub use super::error::{EmbedError, Error, NewEmbedderError};
|
||||
use super::{DistributionShift, Embedding};
|
||||
use super::{DistributionShift, Embedding, EmbeddingCache};
|
||||
|
||||
#[derive(
|
||||
Debug,
|
||||
@@ -84,6 +84,7 @@ pub struct Embedder {
|
||||
options: EmbedderOptions,
|
||||
dimensions: usize,
|
||||
pooling: Pooling,
|
||||
cache: EmbeddingCache,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Embedder {
|
||||
@@ -149,7 +150,10 @@ impl From<PoolingConfig> for Pooling {
|
||||
}
|
||||
|
||||
impl Embedder {
|
||||
pub fn new(options: EmbedderOptions) -> std::result::Result<Self, NewEmbedderError> {
|
||||
pub fn new(
|
||||
options: EmbedderOptions,
|
||||
cache_cap: usize,
|
||||
) -> std::result::Result<Self, NewEmbedderError> {
|
||||
let device = match candle_core::Device::cuda_if_available(0) {
|
||||
Ok(device) => device,
|
||||
Err(error) => {
|
||||
@@ -245,7 +249,14 @@ impl Embedder {
|
||||
tokenizer.with_padding(Some(pp));
|
||||
}
|
||||
|
||||
let mut this = Self { model, tokenizer, options, dimensions: 0, pooling };
|
||||
let mut this = Self {
|
||||
model,
|
||||
tokenizer,
|
||||
options,
|
||||
dimensions: 0,
|
||||
pooling,
|
||||
cache: EmbeddingCache::new(cache_cap),
|
||||
};
|
||||
|
||||
let embeddings = this
|
||||
.embed(vec!["test".into()])
|
||||
@@ -355,4 +366,8 @@ impl Embedder {
|
||||
pub(crate) fn embed_index_ref(&self, texts: &[&str]) -> Result<Vec<Embedding>, EmbedError> {
|
||||
texts.iter().map(|text| self.embed_one(text)).collect()
|
||||
}
|
||||
|
||||
pub(super) fn cache(&self) -> &EmbeddingCache {
|
||||
&self.cache
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use arroy::distances::{BinaryQuantizedCosine, Cosine};
|
||||
@@ -12,6 +13,7 @@ use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use self::error::{EmbedError, NewEmbedderError};
|
||||
use crate::progress::Progress;
|
||||
use crate::prompt::{Prompt, PromptData};
|
||||
use crate::ThreadPoolNoAbort;
|
||||
|
||||
@@ -80,12 +82,15 @@ impl ArroyWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
|
||||
&mut self,
|
||||
wtxn: &mut RwTxn,
|
||||
progress: &Progress,
|
||||
rng: &mut R,
|
||||
dimension: usize,
|
||||
quantizing: bool,
|
||||
arroy_memory: Option<usize>,
|
||||
cancel: &(impl Fn() -> bool + Sync + Send),
|
||||
) -> Result<(), arroy::Error> {
|
||||
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||
@@ -105,9 +110,19 @@ impl ArroyWrapper {
|
||||
// sensitive.
|
||||
if quantizing && !self.quantized {
|
||||
let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(wtxn)?;
|
||||
writer.builder(rng).cancel(cancel).build(wtxn)?;
|
||||
writer
|
||||
.builder(rng)
|
||||
.available_memory(arroy_memory.unwrap_or(usize::MAX))
|
||||
.progress(|step| progress.update_progress_from_arroy(step))
|
||||
.cancel(cancel)
|
||||
.build(wtxn)?;
|
||||
} else if writer.need_build(wtxn)? {
|
||||
writer.builder(rng).cancel(cancel).build(wtxn)?;
|
||||
writer
|
||||
.builder(rng)
|
||||
.available_memory(arroy_memory.unwrap_or(usize::MAX))
|
||||
.progress(|step| progress.update_progress_from_arroy(step))
|
||||
.cancel(cancel)
|
||||
.build(wtxn)?;
|
||||
} else if writer.is_empty(wtxn)? {
|
||||
break;
|
||||
}
|
||||
@@ -542,6 +557,46 @@ pub enum Embedder {
|
||||
Composite(composite::Embedder),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct EmbeddingCache {
|
||||
data: Option<Mutex<lru::LruCache<String, Embedding>>>,
|
||||
}
|
||||
|
||||
impl EmbeddingCache {
|
||||
const MAX_TEXT_LEN: usize = 2000;
|
||||
|
||||
pub fn new(cap: usize) -> Self {
|
||||
let data = NonZeroUsize::new(cap).map(lru::LruCache::new).map(Mutex::new);
|
||||
Self { data }
|
||||
}
|
||||
|
||||
/// Get the embedding corresponding to `text`, if any is present in the cache.
|
||||
pub fn get(&self, text: &str) -> Option<Embedding> {
|
||||
let data = self.data.as_ref()?;
|
||||
if text.len() > Self::MAX_TEXT_LEN {
|
||||
return None;
|
||||
}
|
||||
let mut cache = data.lock().unwrap();
|
||||
|
||||
cache.get(text).cloned()
|
||||
}
|
||||
|
||||
/// Puts a new embedding for the specified `text`
|
||||
pub fn put(&self, text: String, embedding: Embedding) {
|
||||
let Some(data) = self.data.as_ref() else {
|
||||
return;
|
||||
};
|
||||
if text.len() > Self::MAX_TEXT_LEN {
|
||||
return;
|
||||
}
|
||||
tracing::trace!(text, "embedding added to cache");
|
||||
|
||||
let mut cache = data.lock().unwrap();
|
||||
|
||||
cache.put(text, embedding);
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for an embedder.
|
||||
#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
|
||||
pub struct EmbeddingConfig {
|
||||
@@ -620,19 +675,30 @@ impl Default for EmbedderOptions {
|
||||
|
||||
impl Embedder {
|
||||
/// Spawns a new embedder built from its options.
|
||||
pub fn new(options: EmbedderOptions) -> std::result::Result<Self, NewEmbedderError> {
|
||||
pub fn new(
|
||||
options: EmbedderOptions,
|
||||
cache_cap: usize,
|
||||
) -> std::result::Result<Self, NewEmbedderError> {
|
||||
Ok(match options {
|
||||
EmbedderOptions::HuggingFace(options) => Self::HuggingFace(hf::Embedder::new(options)?),
|
||||
EmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?),
|
||||
EmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?),
|
||||
EmbedderOptions::HuggingFace(options) => {
|
||||
Self::HuggingFace(hf::Embedder::new(options, cache_cap)?)
|
||||
}
|
||||
EmbedderOptions::OpenAi(options) => {
|
||||
Self::OpenAi(openai::Embedder::new(options, cache_cap)?)
|
||||
}
|
||||
EmbedderOptions::Ollama(options) => {
|
||||
Self::Ollama(ollama::Embedder::new(options, cache_cap)?)
|
||||
}
|
||||
EmbedderOptions::UserProvided(options) => {
|
||||
Self::UserProvided(manual::Embedder::new(options))
|
||||
}
|
||||
EmbedderOptions::Rest(options) => {
|
||||
Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?)
|
||||
}
|
||||
EmbedderOptions::Rest(options) => Self::Rest(rest::Embedder::new(
|
||||
options,
|
||||
cache_cap,
|
||||
rest::ConfigurationSource::User,
|
||||
)?),
|
||||
EmbedderOptions::Composite(options) => {
|
||||
Self::Composite(composite::Embedder::new(options)?)
|
||||
Self::Composite(composite::Embedder::new(options, cache_cap)?)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -642,19 +708,35 @@ impl Embedder {
|
||||
#[tracing::instrument(level = "debug", skip_all, target = "search")]
|
||||
pub fn embed_search(
|
||||
&self,
|
||||
text: String,
|
||||
text: &str,
|
||||
deadline: Option<Instant>,
|
||||
) -> std::result::Result<Embedding, EmbedError> {
|
||||
let texts = vec![text];
|
||||
let mut embedding = match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.embed(texts),
|
||||
Embedder::OpenAi(embedder) => embedder.embed(&texts, deadline),
|
||||
Embedder::Ollama(embedder) => embedder.embed(&texts, deadline),
|
||||
Embedder::UserProvided(embedder) => embedder.embed(&texts),
|
||||
Embedder::Rest(embedder) => embedder.embed(texts, deadline),
|
||||
Embedder::Composite(embedder) => embedder.search.embed(texts, deadline),
|
||||
if let Some(cache) = self.cache() {
|
||||
if let Some(embedding) = cache.get(text) {
|
||||
tracing::trace!(text, "embedding found in cache");
|
||||
return Ok(embedding);
|
||||
}
|
||||
}
|
||||
let embedding = match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.embed_one(text),
|
||||
Embedder::OpenAi(embedder) => {
|
||||
embedder.embed(&[text], deadline)?.pop().ok_or_else(EmbedError::missing_embedding)
|
||||
}
|
||||
Embedder::Ollama(embedder) => {
|
||||
embedder.embed(&[text], deadline)?.pop().ok_or_else(EmbedError::missing_embedding)
|
||||
}
|
||||
Embedder::UserProvided(embedder) => embedder.embed_one(text),
|
||||
Embedder::Rest(embedder) => embedder
|
||||
.embed_ref(&[text], deadline)?
|
||||
.pop()
|
||||
.ok_or_else(EmbedError::missing_embedding),
|
||||
Embedder::Composite(embedder) => embedder.search.embed_one(text, deadline),
|
||||
}?;
|
||||
let embedding = embedding.pop().ok_or_else(EmbedError::missing_embedding)?;
|
||||
|
||||
if let Some(cache) = self.cache() {
|
||||
cache.put(text.to_owned(), embedding.clone());
|
||||
}
|
||||
|
||||
Ok(embedding)
|
||||
}
|
||||
|
||||
@@ -750,6 +832,17 @@ impl Embedder {
|
||||
Embedder::Composite(embedder) => embedder.index.uses_document_template(),
|
||||
}
|
||||
}
|
||||
|
||||
fn cache(&self) -> Option<&EmbeddingCache> {
|
||||
match self {
|
||||
Embedder::HuggingFace(embedder) => Some(embedder.cache()),
|
||||
Embedder::OpenAi(embedder) => Some(embedder.cache()),
|
||||
Embedder::UserProvided(_) => None,
|
||||
Embedder::Ollama(embedder) => Some(embedder.cache()),
|
||||
Embedder::Rest(embedder) => Some(embedder.cache()),
|
||||
Embedder::Composite(embedder) => embedder.search.cache(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Describes the mean and sigma of distribution of embedding similarity in the embedding space.
|
||||
|
||||
@@ -5,7 +5,7 @@ use rayon::slice::ParallelSlice as _;
|
||||
|
||||
use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind};
|
||||
use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
|
||||
use super::{DistributionShift, REQUEST_PARALLELISM};
|
||||
use super::{DistributionShift, EmbeddingCache, REQUEST_PARALLELISM};
|
||||
use crate::error::FaultSource;
|
||||
use crate::vector::Embedding;
|
||||
use crate::ThreadPoolNoAbort;
|
||||
@@ -75,9 +75,10 @@ impl EmbedderOptions {
|
||||
}
|
||||
|
||||
impl Embedder {
|
||||
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
|
||||
pub fn new(options: EmbedderOptions, cache_cap: usize) -> Result<Self, NewEmbedderError> {
|
||||
let rest_embedder = match RestEmbedder::new(
|
||||
options.into_rest_embedder_config()?,
|
||||
cache_cap,
|
||||
super::rest::ConfigurationSource::Ollama,
|
||||
) {
|
||||
Ok(embedder) => embedder,
|
||||
@@ -182,6 +183,10 @@ impl Embedder {
|
||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||
self.rest_embedder.distribution()
|
||||
}
|
||||
|
||||
pub(super) fn cache(&self) -> &EmbeddingCache {
|
||||
self.rest_embedder.cache()
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ollama_path() -> String {
|
||||
|
||||
@@ -7,7 +7,7 @@ use rayon::slice::ParallelSlice as _;
|
||||
|
||||
use super::error::{EmbedError, NewEmbedderError};
|
||||
use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
|
||||
use super::{DistributionShift, REQUEST_PARALLELISM};
|
||||
use super::{DistributionShift, EmbeddingCache, REQUEST_PARALLELISM};
|
||||
use crate::error::FaultSource;
|
||||
use crate::vector::error::EmbedErrorKind;
|
||||
use crate::vector::Embedding;
|
||||
@@ -176,7 +176,7 @@ pub struct Embedder {
|
||||
}
|
||||
|
||||
impl Embedder {
|
||||
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
|
||||
pub fn new(options: EmbedderOptions, cache_cap: usize) -> Result<Self, NewEmbedderError> {
|
||||
let mut inferred_api_key = Default::default();
|
||||
let api_key = options.api_key.as_ref().unwrap_or_else(|| {
|
||||
inferred_api_key = infer_api_key();
|
||||
@@ -201,6 +201,7 @@ impl Embedder {
|
||||
}),
|
||||
headers: Default::default(),
|
||||
},
|
||||
cache_cap,
|
||||
super::rest::ConfigurationSource::OpenAi,
|
||||
)?;
|
||||
|
||||
@@ -318,6 +319,10 @@ impl Embedder {
|
||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||
self.options.distribution()
|
||||
}
|
||||
|
||||
pub(super) fn cache(&self) -> &EmbeddingCache {
|
||||
self.rest_embedder.cache()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Embedder {
|
||||
|
||||
@@ -9,7 +9,9 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::error::EmbedErrorKind;
|
||||
use super::json_template::ValueTemplate;
|
||||
use super::{DistributionShift, EmbedError, Embedding, NewEmbedderError, REQUEST_PARALLELISM};
|
||||
use super::{
|
||||
DistributionShift, EmbedError, Embedding, EmbeddingCache, NewEmbedderError, REQUEST_PARALLELISM,
|
||||
};
|
||||
use crate::error::FaultSource;
|
||||
use crate::ThreadPoolNoAbort;
|
||||
|
||||
@@ -75,6 +77,7 @@ pub struct Embedder {
|
||||
data: EmbedderData,
|
||||
dimensions: usize,
|
||||
distribution: Option<DistributionShift>,
|
||||
cache: EmbeddingCache,
|
||||
}
|
||||
|
||||
/// All data needed to perform requests and parse responses
|
||||
@@ -123,6 +126,7 @@ enum InputType {
|
||||
impl Embedder {
|
||||
pub fn new(
|
||||
options: EmbedderOptions,
|
||||
cache_cap: usize,
|
||||
configuration_source: ConfigurationSource,
|
||||
) -> Result<Self, NewEmbedderError> {
|
||||
let bearer = options.api_key.as_deref().map(|api_key| format!("Bearer {api_key}"));
|
||||
@@ -152,7 +156,12 @@ impl Embedder {
|
||||
infer_dimensions(&data)?
|
||||
};
|
||||
|
||||
Ok(Self { data, dimensions, distribution: options.distribution })
|
||||
Ok(Self {
|
||||
data,
|
||||
dimensions,
|
||||
distribution: options.distribution,
|
||||
cache: EmbeddingCache::new(cache_cap),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn embed(
|
||||
@@ -256,6 +265,10 @@ impl Embedder {
|
||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||
self.distribution
|
||||
}
|
||||
|
||||
pub(super) fn cache(&self) -> &EmbeddingCache {
|
||||
&self.cache
|
||||
}
|
||||
}
|
||||
|
||||
fn infer_dimensions(data: &EmbedderData) -> Result<usize, NewEmbedderError> {
|
||||
|
||||
@@ -12,7 +12,8 @@ use serde_json::{from_value, json};
|
||||
#[test]
|
||||
fn test_facet_distribution_with_no_facet_values() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path, true).unwrap();
|
||||
|
||||
|
||||
@@ -34,7 +34,8 @@ pub const CONTENT: &str = include_str!("../assets/test_set.ndjson");
|
||||
|
||||
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path, true).unwrap();
|
||||
|
||||
|
||||
@@ -262,7 +262,8 @@ fn criteria_mixup() {
|
||||
#[test]
|
||||
fn criteria_ascdesc() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(12 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path, true).unwrap();
|
||||
|
||||
|
||||
@@ -108,7 +108,8 @@ fn test_typo_tolerance_two_typo() {
|
||||
#[test]
|
||||
fn test_typo_disabled_on_word() {
|
||||
let tmp = tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(4096 * 100);
|
||||
let index = Index::new(options, tmp.path(), true).unwrap();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user