4513: Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1" r=Kerollmops a=irevoire

This reverts commit bd74cce86a, reversing changes made to d2f77e88bd.

This commit wasn’t supposed to be merged on the `release-v1.7.1` branch


Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
meili-bors[bot]
2024-03-20 09:57:24 +00:00
committed by GitHub
34 changed files with 614 additions and 1788 deletions

View File

@@ -43,4 +43,4 @@ jobs:
- name: Run benchmarks on PR ${{ github.event.issue.id }} - name: Run benchmarks on PR ${{ github.event.issue.id }}
run: | run: |
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" -- ${{ steps.command.outputs.command-arguments }} cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.url }}) on [#${{github.event.issue.id}}](${{ github.event.issue.url }})" -- ${{ steps.command.outputs.command-arguments }}

View File

@@ -110,25 +110,6 @@ jobs:
--milestone $MILESTONE_VERSION \ --milestone $MILESTONE_VERSION \
--assignee curquiza --assignee curquiza
create-update-version-issue:
needs: get-release-version
# Create the changelog issue if the release is not only a patch release
if: github.event.action == 'created'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-version-issue.md > $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Update version in Cargo.toml for $MILESTONE_VERSION" \
--label 'maintenance' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
# ---------------- # ----------------
# MILESTONE CLOSED # MILESTONE CLOSED
# ---------------- # ----------------

View File

@@ -4,7 +4,7 @@ First, thank you for contributing to Meilisearch! The goal of this document is t
Remember that there are many ways to contribute other than writing code: writing [tutorials or blog posts](https://github.com/meilisearch/awesome-meilisearch), improving [the documentation](https://github.com/meilisearch/documentation), submitting [bug reports](https://github.com/meilisearch/meilisearch/issues/new?assignees=&labels=&template=bug_report.md&title=) and [feature requests](https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal)... Remember that there are many ways to contribute other than writing code: writing [tutorials or blog posts](https://github.com/meilisearch/awesome-meilisearch), improving [the documentation](https://github.com/meilisearch/documentation), submitting [bug reports](https://github.com/meilisearch/meilisearch/issues/new?assignees=&labels=&template=bug_report.md&title=) and [feature requests](https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal)...
Meilisearch can manage multiple indexes, handle the update store, and expose an HTTP API. Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/meilisearch/tree/main/milli), while tokenization is handled by [our `charabia` library](https://github.com/meilisearch/charabia/). The code in this repository is only concerned with managing multiple indexes, handling the update store, and exposing an HTTP API. Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/milli), while tokenization is handled by [our `charabia` library](https://github.com/meilisearch/charabia/).
If Meilisearch does not offer optimized support for your language, please consider contributing to `charabia` by following the [CONTRIBUTING.md file](https://github.com/meilisearch/charabia/blob/main/CONTRIBUTING.md) and integrating your intended normalizer/segmenter. If Meilisearch does not offer optimized support for your language, please consider contributing to `charabia` by following the [CONTRIBUTING.md file](https://github.com/meilisearch/charabia/blob/main/CONTRIBUTING.md) and integrating your intended normalizer/segmenter.

195
Cargo.lock generated
View File

@@ -36,9 +36,9 @@ dependencies = [
[[package]] [[package]]
name = "actix-http" name = "actix-http"
version = "3.6.0" version = "3.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743" checksum = "129d4c88e98860e1758c5de288d1632b07970a16d59bdf7b8d66053d582bb71f"
dependencies = [ dependencies = [
"actix-codec", "actix-codec",
"actix-rt", "actix-rt",
@@ -138,9 +138,9 @@ dependencies = [
[[package]] [[package]]
name = "actix-tls" name = "actix-tls"
version = "3.3.0" version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f" checksum = "72616e7fbec0aa99c6f3164677fa48ff5a60036d0799c98cab894a44f3e0efc3"
dependencies = [ dependencies = [
"actix-rt", "actix-rt",
"actix-service", "actix-service",
@@ -148,11 +148,13 @@ dependencies = [
"futures-core", "futures-core",
"impl-more", "impl-more",
"pin-project-lite", "pin-project-lite",
"rustls 0.21.6",
"rustls-webpki",
"tokio", "tokio",
"tokio-rustls", "tokio-rustls 0.23.4",
"tokio-util", "tokio-util",
"tracing", "tracing",
"webpki-roots", "webpki-roots 0.22.6",
] ]
[[package]] [[package]]
@@ -167,9 +169,9 @@ dependencies = [
[[package]] [[package]]
name = "actix-web" name = "actix-web"
version = "4.5.1" version = "4.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984" checksum = "e43428f3bf11dee6d166b00ec2df4e3aa8cc1606aaa0b7433c146852e2f4e03b"
dependencies = [ dependencies = [
"actix-codec", "actix-codec",
"actix-http", "actix-http",
@@ -257,9 +259,9 @@ dependencies = [
[[package]] [[package]]
name = "ahash" name = "ahash"
version = "0.8.11" version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"getrandom", "getrandom",
@@ -494,7 +496,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]] [[package]]
name = "benchmarks" name = "benchmarks"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytes", "bytes",
@@ -628,7 +630,7 @@ dependencies = [
[[package]] [[package]]
name = "build-info" name = "build-info"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"time", "time",
@@ -833,9 +835,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.83" version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01"
dependencies = [ dependencies = [
"jobserver", "jobserver",
"libc", "libc",
@@ -1529,7 +1531,7 @@ dependencies = [
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@@ -1767,7 +1769,7 @@ dependencies = [
[[package]] [[package]]
name = "file-store" name = "file-store"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"faux", "faux",
"tempfile", "tempfile",
@@ -1790,7 +1792,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"insta", "insta",
"nom", "nom",
@@ -1810,7 +1812,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -1928,7 +1930,7 @@ dependencies = [
[[package]] [[package]]
name = "fuzzers" name = "fuzzers"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"clap", "clap",
@@ -2102,10 +2104,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"js-sys",
"libc", "libc",
"wasi", "wasi",
"wasm-bindgen",
] ]
[[package]] [[package]]
@@ -2224,7 +2224,7 @@ dependencies = [
"atomic-polyfill", "atomic-polyfill",
"hash32", "hash32",
"rustc_version", "rustc_version",
"spin", "spin 0.9.8",
"stable_deref_trait", "stable_deref_trait",
] ]
@@ -2393,9 +2393,9 @@ dependencies = [
"futures-util", "futures-util",
"http 0.2.11", "http 0.2.11",
"hyper", "hyper",
"rustls", "rustls 0.21.6",
"tokio", "tokio",
"tokio-rustls", "tokio-rustls 0.24.1",
] ]
[[package]] [[package]]
@@ -2422,7 +2422,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
[[package]] [[package]]
name = "index-scheduler" name = "index-scheduler"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@@ -2609,7 +2609,7 @@ dependencies = [
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -2617,14 +2617,13 @@ dependencies = [
[[package]] [[package]]
name = "jsonwebtoken" name = "jsonwebtoken"
version = "9.2.0" version = "8.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c7ea04a7c5c055c175f189b6dc6ba036fd62306b58c66c9f6389036c503a3f4" checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378"
dependencies = [ dependencies = [
"base64 0.21.7", "base64 0.21.7",
"js-sys",
"pem", "pem",
"ring", "ring 0.16.20",
"serde", "serde",
"serde_json", "serde_json",
"simple_asn1", "simple_asn1",
@@ -3118,7 +3117,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]] [[package]]
name = "meili-snap" name = "meili-snap"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"insta", "insta",
"md5", "md5",
@@ -3127,7 +3126,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch" name = "meilisearch"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-http", "actix-http",
@@ -3185,7 +3184,7 @@ dependencies = [
"rayon", "rayon",
"regex", "regex",
"reqwest", "reqwest",
"rustls", "rustls 0.20.9",
"rustls-pemfile", "rustls-pemfile",
"segment", "segment",
"serde", "serde",
@@ -3220,7 +3219,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"base64 0.21.7", "base64 0.21.7",
"enum-iterator", "enum-iterator",
@@ -3239,7 +3238,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-types" name = "meilisearch-types"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
@@ -3269,7 +3268,7 @@ dependencies = [
[[package]] [[package]]
name = "meilitool" name = "meilitool"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
@@ -3308,7 +3307,7 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"arroy", "arroy",
"big_s", "big_s",
@@ -3414,9 +3413,9 @@ dependencies = [
[[package]] [[package]]
name = "mio" name = "mio"
version = "0.8.11" version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0"
dependencies = [ dependencies = [
"libc", "libc",
"log", "log",
@@ -3734,12 +3733,11 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]] [[package]]
name = "pem" name = "pem"
version = "3.0.3" version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310" checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8"
dependencies = [ dependencies = [
"base64 0.21.7", "base64 0.13.1",
"serde",
] ]
[[package]] [[package]]
@@ -3750,7 +3748,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"big_s", "big_s",
"serde_json", "serde_json",
@@ -4241,14 +4239,14 @@ dependencies = [
"once_cell", "once_cell",
"percent-encoding", "percent-encoding",
"pin-project-lite", "pin-project-lite",
"rustls", "rustls 0.21.6",
"rustls-pemfile", "rustls-pemfile",
"serde", "serde",
"serde_json", "serde_json",
"serde_urlencoded", "serde_urlencoded",
"system-configuration", "system-configuration",
"tokio", "tokio",
"tokio-rustls", "tokio-rustls 0.24.1",
"tokio-util", "tokio-util",
"tower-service", "tower-service",
"url", "url",
@@ -4256,7 +4254,7 @@ dependencies = [
"wasm-bindgen-futures", "wasm-bindgen-futures",
"wasm-streams", "wasm-streams",
"web-sys", "web-sys",
"webpki-roots", "webpki-roots 0.25.3",
"winreg", "winreg",
] ]
@@ -4274,15 +4272,30 @@ checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.7" version = "0.16.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
dependencies = [
"cc",
"libc",
"once_cell",
"spin 0.5.2",
"untrusted 0.7.1",
"web-sys",
"winapi",
]
[[package]]
name = "ring"
version = "0.17.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e"
dependencies = [ dependencies = [
"cc", "cc",
"getrandom", "getrandom",
"libc", "libc",
"spin", "spin 0.9.8",
"untrusted", "untrusted 0.9.0",
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
@@ -4360,12 +4373,24 @@ dependencies = [
[[package]] [[package]]
name = "rustls" name = "rustls"
version = "0.21.10" version = "0.20.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99"
dependencies = [ dependencies = [
"log", "log",
"ring", "ring 0.16.20",
"sct",
"webpki",
]
[[package]]
name = "rustls"
version = "0.21.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb"
dependencies = [
"log",
"ring 0.16.20",
"rustls-webpki", "rustls-webpki",
"sct", "sct",
] ]
@@ -4385,8 +4410,8 @@ version = "0.101.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
dependencies = [ dependencies = [
"ring", "ring 0.17.3",
"untrusted", "untrusted 0.9.0",
] ]
[[package]] [[package]]
@@ -4428,12 +4453,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]] [[package]]
name = "sct" name = "sct"
version = "0.7.1" version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
dependencies = [ dependencies = [
"ring", "ring 0.16.20",
"untrusted", "untrusted 0.7.1",
] ]
[[package]] [[package]]
@@ -4696,6 +4721,12 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "spin"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]] [[package]]
name = "spin" name = "spin"
version = "0.9.8" version = "0.9.8"
@@ -5049,13 +5080,24 @@ dependencies = [
"syn 2.0.48", "syn 2.0.48",
] ]
[[package]]
name = "tokio-rustls"
version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59"
dependencies = [
"rustls 0.20.9",
"tokio",
"webpki",
]
[[package]] [[package]]
name = "tokio-rustls" name = "tokio-rustls"
version = "0.24.1" version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
dependencies = [ dependencies = [
"rustls", "rustls 0.21.6",
"tokio", "tokio",
] ]
@@ -5324,6 +5366,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "untrusted"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
[[package]] [[package]]
name = "untrusted" name = "untrusted"
version = "0.9.0" version = "0.9.0"
@@ -5340,13 +5388,13 @@ dependencies = [
"flate2", "flate2",
"log", "log",
"once_cell", "once_cell",
"rustls", "rustls 0.21.6",
"rustls-webpki", "rustls-webpki",
"serde", "serde",
"serde_json", "serde_json",
"socks", "socks",
"url", "url",
"webpki-roots", "webpki-roots 0.25.3",
] ]
[[package]] [[package]]
@@ -5582,6 +5630,25 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "webpki"
version = "0.22.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07ecc0cd7cac091bf682ec5efa18b1cff79d617b84181f38b3951dbe135f607f"
dependencies = [
"ring 0.16.20",
"untrusted 0.7.1",
]
[[package]]
name = "webpki-roots"
version = "0.22.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87"
dependencies = [
"webpki",
]
[[package]] [[package]]
name = "webpki-roots" name = "webpki-roots"
version = "0.25.3" version = "0.25.3"
@@ -5876,7 +5943,7 @@ dependencies = [
[[package]] [[package]]
name = "xtask" name = "xtask"
version = "1.8.0" version = "1.7.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"build-info", "build-info",

View File

@@ -21,7 +21,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.8.0" version = "1.7.1"
authors = [ authors = [
"Quentin de Quelen <quentin@dequelen.me>", "Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>", "Clément Renault <clement@meilisearch.com>",

View File

@@ -11,7 +11,7 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
actix-web = { version = "4.5.1", default-features = false } actix-web = { version = "4.4.1", default-features = false }
anyhow = "1.0.79" anyhow = "1.0.79"
convert_case = "0.6.0" convert_case = "0.6.0"
csv = "1.3.0" csv = "1.3.0"

View File

@@ -14,18 +14,18 @@ default-run = "meilisearch"
[dependencies] [dependencies]
actix-cors = "0.7.0" actix-cors = "0.7.0"
actix-http = { version = "3.6.0", default-features = false, features = [ actix-http = { version = "3.5.1", default-features = false, features = [
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"rustls-0_21", "rustls",
] } ] }
actix-utils = "3.0.1" actix-utils = "3.0.1"
actix-web = { version = "4.5.1", default-features = false, features = [ actix-web = { version = "4.4.1", default-features = false, features = [
"macros", "macros",
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"cookies", "cookies",
"rustls-0_21", "rustls",
] } ] }
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true } actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.79", features = ["backtrace"] } anyhow = { version = "1.0.79", features = ["backtrace"] }
@@ -52,7 +52,7 @@ index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.1.0", features = ["serde"] } indexmap = { version = "2.1.0", features = ["serde"] }
is-terminal = "0.4.10" is-terminal = "0.4.10"
itertools = "0.11.0" itertools = "0.11.0"
jsonwebtoken = "9.2.0" jsonwebtoken = "8.3.0"
lazy_static = "1.4.0" lazy_static = "1.4.0"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
@@ -75,7 +75,7 @@ reqwest = { version = "0.11.23", features = [
"rustls-tls", "rustls-tls",
"json", "json",
], default-features = false } ], default-features = false }
rustls = "0.21.6" rustls = "0.20.8"
rustls-pemfile = "1.0.2" rustls-pemfile = "1.0.2"
segment = { version = "0.2.3", optional = true } segment = { version = "0.2.3", optional = true }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.195", features = ["derive"] }

View File

@@ -151,7 +151,7 @@ async fn run_http(
.keep_alive(KeepAlive::Os); .keep_alive(KeepAlive::Os);
if let Some(config) = opt_clone.get_ssl_config()? { if let Some(config) = opt_clone.get_ssl_config()? {
http_server.bind_rustls_021(opt_clone.http_addr, config)?.run().await?; http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?;
} else { } else {
http_server.bind(&opt_clone.http_addr)?.run().await?; http_server.bind(&opt_clone.http_addr)?.run().await?;
} }

View File

@@ -564,11 +564,11 @@ impl Opt {
} }
if self.ssl_require_auth { if self.ssl_require_auth {
let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots); let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots);
config.with_client_cert_verifier(Arc::from(verifier)) config.with_client_cert_verifier(verifier)
} else { } else {
let verifier = let verifier =
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots); AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots);
config.with_client_cert_verifier(Arc::from(verifier)) config.with_client_cert_verifier(verifier)
} }
} }
None => config.with_no_client_auth(), None => config.with_no_client_auth(),

View File

@@ -604,7 +604,6 @@ fn embedder_analytics(
EmbedderSource::OpenAi => sources.insert("openAi"), EmbedderSource::OpenAi => sources.insert("openAi"),
EmbedderSource::HuggingFace => sources.insert("huggingFace"), EmbedderSource::HuggingFace => sources.insert("huggingFace"),
EmbedderSource::UserProvided => sources.insert("userProvided"), EmbedderSource::UserProvided => sources.insert("userProvided"),
EmbedderSource::Ollama => sources.insert("ollama"),
}; };
} }
}; };

View File

@@ -530,7 +530,7 @@ pub fn perform_search(
// The attributes to retrieve are the ones explicitly marked as to retrieve (all by default), // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
// but these attributes must be also be present // but these attributes must be also be present
// - in the fields_ids_map // - in the fields_ids_map
// - in the displayed attributes // - in the the displayed attributes
let to_retrieve_ids: BTreeSet<_> = query let to_retrieve_ids: BTreeSet<_> = query
.attributes_to_retrieve .attributes_to_retrieve
.as_ref() .as_ref()
@@ -671,16 +671,27 @@ pub fn perform_search(
let sort_facet_values_by = let sort_facet_values_by =
index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?;
let default_sort_facet_values_by =
sort_facet_values_by.get("*").copied().unwrap_or_default();
if fields.iter().all(|f| f != "*") { if fields.iter().all(|f| f != "*") {
let fields: Vec<_> = let fields: Vec<_> = fields
fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect(); .iter()
.map(|n| {
(
n,
sort_facet_values_by
.get(n)
.copied()
.unwrap_or(default_sort_facet_values_by),
)
})
.collect();
facet_distribution.facets(fields); facet_distribution.facets(fields);
} }
let distribution = facet_distribution let distribution = facet_distribution
.candidates(candidates) .candidates(candidates)
.default_order_by(sort_facet_values_by.get("*")) .default_order_by(default_sort_facet_values_by)
.execute()?; .execute()?;
let stats = facet_distribution.compute_stats()?; let stats = facet_distribution.compute_stats()?;
(Some(distribution), Some(stats)) (Some(distribution), Some(stats))

View File

@@ -1237,8 +1237,8 @@ async fn error_add_documents_missing_document_id() {
} }
#[actix_rt::test] #[actix_rt::test]
#[should_panic] #[ignore] // // TODO: Fix in an other PR: this does not provoke any error.
async fn error_document_field_limit_reached_in_one_document() { async fn error_document_field_limit_reached() {
let server = Server::new().await; let server = Server::new().await;
let index = server.index("test"); let index = server.index("test");
@@ -1246,241 +1246,22 @@ async fn error_document_field_limit_reached_in_one_document() {
let mut big_object = std::collections::HashMap::new(); let mut big_object = std::collections::HashMap::new();
big_object.insert("id".to_owned(), "wow"); big_object.insert("id".to_owned(), "wow");
for i in 0..(u16::MAX as usize + 1) { for i in 0..65535 {
let key = i.to_string(); let key = i.to_string();
big_object.insert(key, "I am a text!"); big_object.insert(key, "I am a text!");
} }
let documents = json!([big_object]); let documents = json!([big_object]);
let (response, code) = index.update_documents(documents, Some("id")).await; let (_response, code) = index.update_documents(documents, Some("id")).await;
snapshot!(code, @"500 Internal Server Error"); snapshot!(code, @"202");
let response = index.wait_task(response.uid()).await; index.wait_task(0).await;
snapshot!(code, @"202 Accepted"); let (response, code) = index.get_task(0).await;
snapshot!(code, @"200");
// Documents without a primary key are not accepted. // Documents without a primary key are not accepted.
snapshot!(response, snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###" @"");
{
"uid": 1,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn error_document_field_limit_reached_over_multiple_documents() {
let server = Server::new().await;
let index = server.index("test");
index.create(Some("id")).await;
let mut big_object = std::collections::HashMap::new();
big_object.insert("id".to_owned(), "wow");
for i in 0..(u16::MAX / 2) {
let key = i.to_string();
big_object.insert(key, "I am a text!");
}
let documents = json!([big_object]);
let (response, code) = index.update_documents(documents, Some("id")).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(code, @"202 Accepted");
snapshot!(response,
@r###"
{
"uid": 1,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let mut big_object = std::collections::HashMap::new();
big_object.insert("id".to_owned(), "waw");
for i in (u16::MAX as usize / 2)..(u16::MAX as usize + 1) {
let key = i.to_string();
big_object.insert(key, "I am a text!");
}
let documents = json!([big_object]);
let (response, code) = index.update_documents(documents, Some("id")).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(code, @"202 Accepted");
snapshot!(response,
@r###"
{
"uid": 2,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "A document cannot contain more than 65,535 fields.",
"code": "max_fields_limit_exceeded",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#max_fields_limit_exceeded"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn error_document_field_limit_reached_in_one_nested_document() {
let server = Server::new().await;
let index = server.index("test");
index.create(Some("id")).await;
let mut nested = std::collections::HashMap::new();
for i in 0..(u16::MAX as usize + 1) {
let key = i.to_string();
nested.insert(key, "I am a text!");
}
let mut big_object = std::collections::HashMap::new();
big_object.insert("id".to_owned(), "wow");
let documents = json!([big_object]);
let (response, code) = index.update_documents(documents, Some("id")).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(code, @"202 Accepted");
// Documents without a primary key are not accepted.
snapshot!(response,
@r###"
{
"uid": 1,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn error_document_field_limit_reached_over_multiple_documents_with_nested_fields() {
let server = Server::new().await;
let index = server.index("test");
index.create(Some("id")).await;
let mut nested = std::collections::HashMap::new();
for i in 0..(u16::MAX / 2) {
let key = i.to_string();
nested.insert(key, "I am a text!");
}
let mut big_object = std::collections::HashMap::new();
big_object.insert("id".to_owned(), "wow");
let documents = json!([big_object]);
let (response, code) = index.update_documents(documents, Some("id")).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(code, @"202 Accepted");
snapshot!(response,
@r###"
{
"uid": 1,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let mut nested = std::collections::HashMap::new();
for i in 0..(u16::MAX / 2) {
let key = i.to_string();
nested.insert(key, "I am a text!");
}
let mut big_object = std::collections::HashMap::new();
big_object.insert("id".to_owned(), "wow");
let documents = json!([big_object]);
let (response, code) = index.update_documents(documents, Some("id")).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(code, @"202 Accepted");
snapshot!(response,
@r###"
{
"uid": 2,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
} }
#[actix_rt::test] #[actix_rt::test]

View File

@@ -123,28 +123,6 @@ async fn simple_facet_search_with_max_values() {
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1); assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
} }
#[actix_rt::test]
async fn simple_facet_search_by_count_with_max_values() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index
.update_settings_faceting(
json!({ "maxValuesPerFacet": 1, "sortFacetValuesBy": { "*": "count" } }),
)
.await;
index.update_settings_filterable_attributes(json!(["genres"])).await;
index.add_documents(documents, None).await;
index.wait_task(2).await;
let (response, code) =
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
assert_eq!(code, 200, "{}", response);
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
}
#[actix_rt::test] #[actix_rt::test]
async fn non_filterable_facet_search_error() { async fn non_filterable_facet_search_error() {
let server = Server::new().await; let server = Server::new().await;
@@ -179,24 +157,3 @@ async fn facet_search_dont_support_words() {
assert_eq!(code, 200, "{}", response); assert_eq!(code, 200, "{}", response);
assert_eq!(response["facetHits"].as_array().unwrap().len(), 0); assert_eq!(response["facetHits"].as_array().unwrap().len(), 0);
} }
#[actix_rt::test]
async fn simple_facet_search_with_sort_by_count() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.update_settings_faceting(json!({ "sortFacetValuesBy": { "*": "count" } })).await;
index.update_settings_filterable_attributes(json!(["genres"])).await;
index.add_documents(documents, None).await;
index.wait_task(2).await;
let (response, code) =
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
assert_eq!(code, 200, "{}", response);
let hits = response["facetHits"].as_array().unwrap();
assert_eq!(hits.len(), 2);
assert_eq!(hits[0], json!({ "value": "Action", "count": 3 }));
assert_eq!(hits[1], json!({ "value": "Adventure", "count": 2 }));
}

View File

@@ -20,13 +20,13 @@ use crate::heed_codec::facet::{
use crate::heed_codec::{ use crate::heed_codec::{
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec, BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
}; };
use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision; use crate::proximity::ProximityPrecision;
use crate::vector::EmbeddingConfig; use crate::vector::EmbeddingConfig;
use crate::{ use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64, OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16,
BEU32, BEU64,
}; };
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -1373,19 +1373,21 @@ impl Index {
self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET) self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET)
} }
pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<OrderByMap> { pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<HashMap<String, OrderBy>> {
let orders = self let mut orders = self
.main .main
.remap_types::<Str, SerdeJson<OrderByMap>>() .remap_types::<Str, SerdeJson<HashMap<String, OrderBy>>>()
.get(txn, main_key::SORT_FACET_VALUES_BY)? .get(txn, main_key::SORT_FACET_VALUES_BY)?
.unwrap_or_default(); .unwrap_or_default();
// Insert the default ordering if it is not already overwritten by the user.
orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic);
Ok(orders) Ok(orders)
} }
pub(crate) fn put_sort_facet_values_by( pub(crate) fn put_sort_facet_values_by(
&self, &self,
txn: &mut RwTxn, txn: &mut RwTxn,
val: &OrderByMap, val: &HashMap<String, OrderBy>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val) self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val)
} }

View File

@@ -16,7 +16,6 @@ pub mod facet;
mod fields_ids_map; mod fields_ids_map;
pub mod heed_codec; pub mod heed_codec;
pub mod index; pub mod index;
pub mod order_by_map;
pub mod prompt; pub mod prompt;
pub mod proximity; pub mod proximity;
pub mod score_details; pub mod score_details;
@@ -57,10 +56,10 @@ pub use self::heed_codec::{
UncheckedU8StrStrCodec, UncheckedU8StrStrCodec,
}; };
pub use self::index::Index; pub use self::index::Index;
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
pub use self::search::{ pub use self::search::{
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy, FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
Search, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, MatchingWords, OrderBy, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
DEFAULT_VALUES_PER_FACET,
}; };
pub type Result<T> = std::result::Result<T, error::Error>; pub type Result<T> = std::result::Result<T, error::Error>;

View File

@@ -1,57 +0,0 @@
use std::collections::{hash_map, HashMap};
use std::iter::FromIterator;
use serde::{Deserialize, Deserializer, Serialize};
use crate::OrderBy;
#[derive(Serialize)]
pub struct OrderByMap(HashMap<String, OrderBy>);
impl OrderByMap {
pub fn get(&self, key: impl AsRef<str>) -> OrderBy {
self.0
.get(key.as_ref())
.copied()
.unwrap_or_else(|| self.0.get("*").copied().unwrap_or_default())
}
pub fn insert(&mut self, key: String, value: OrderBy) -> Option<OrderBy> {
self.0.insert(key, value)
}
}
impl Default for OrderByMap {
fn default() -> Self {
let mut map = HashMap::new();
map.insert("*".to_string(), OrderBy::Lexicographic);
OrderByMap(map)
}
}
impl FromIterator<(String, OrderBy)> for OrderByMap {
fn from_iter<T: IntoIterator<Item = (String, OrderBy)>>(iter: T) -> Self {
OrderByMap(iter.into_iter().collect())
}
}
impl IntoIterator for OrderByMap {
type Item = (String, OrderBy);
type IntoIter = hash_map::IntoIter<String, OrderBy>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl<'de> Deserialize<'de> for OrderByMap {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let mut map = Deserialize::deserialize(deserializer).map(OrderByMap)?;
// Insert the default ordering if it is not already overwritten by the user.
map.0.entry("*".to_string()).or_insert(OrderBy::default());
Ok(map)
}
}

View File

@@ -168,7 +168,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
} }
// should we stop? // should we stop?
// We should if the search range doesn't include any // We should if the the search range doesn't include any
// element from the previous key or its successors // element from the previous key or its successors
let should_stop = { let should_stop = {
match self.right { match self.right {
@@ -232,7 +232,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
} }
// should we stop? // should we stop?
// We should if the search range doesn't include any // We should if the the search range doesn't include any
// element from the previous key or its successors // element from the previous key or its successors
let should_stop = { let should_stop = {
match self.right { match self.right {

View File

@@ -6,18 +6,15 @@ use roaring::RoaringBitmap;
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::filter::{BadGeoError, Filter}; pub use self::filter::{BadGeoError, Filter};
pub use self::search::{FacetValueHit, SearchForFacetValues};
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
use crate::heed_codec::BytesRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::{Index, Result}; use crate::{Index, Result};
mod facet_distribution; mod facet_distribution;
mod facet_distribution_iter; mod facet_distribution_iter;
mod facet_range_search; mod facet_range_search;
mod facet_sort_ascending; mod facet_sort_ascending;
mod facet_sort_descending; mod facet_sort_descending;
mod filter; mod filter;
mod search;
fn facet_extreme_value<'t>( fn facet_extreme_value<'t>(
mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't, mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,

View File

@@ -1,326 +0,0 @@
use std::cmp::{Ordering, Reverse};
use std::collections::BinaryHeap;
use std::ops::ControlFlow;
use charabia::normalizer::NormalizerOption;
use charabia::Normalize;
use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer};
use roaring::RoaringBitmap;
use tracing::error;
use crate::error::UserError;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::search::build_dfa;
use crate::{DocumentId, FieldId, OrderBy, Result, Search};
/// The maximum number of values per facet returned by the facet search route.
const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100;
pub struct SearchForFacetValues<'a> {
query: Option<String>,
facet: String,
search_query: Search<'a>,
max_values: usize,
is_hybrid: bool,
}
impl<'a> SearchForFacetValues<'a> {
pub fn new(
facet: String,
search_query: Search<'a>,
is_hybrid: bool,
) -> SearchForFacetValues<'a> {
SearchForFacetValues {
query: None,
facet,
search_query,
max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
is_hybrid,
}
}
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
self.query = Some(query.into());
self
}
pub fn max_values(&mut self, max: usize) -> &mut Self {
self.max_values = max;
self
}
fn one_original_value_of(
&self,
field_id: FieldId,
facet_str: &str,
any_docid: DocumentId,
) -> Result<Option<String>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let key: (FieldId, _, &str) = (field_id, any_docid, facet_str);
Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned()))
}
pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let filterable_fields = index.filterable_fields(rtxn)?;
if !filterable_fields.contains(&self.facet) {
let (valid_fields, hidden_fields) =
index.remove_hidden_fields(rtxn, filterable_fields)?;
return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(),
valid_fields,
hidden_fields,
}
.into());
}
let fields_ids_map = index.fields_ids_map(rtxn)?;
let fid = match fields_ids_map.id(&self.facet) {
Some(fid) => fid,
// we return an empty list of results when the attribute has been
// set as filterable but no document contains this field (yet).
None => return Ok(Vec::new()),
};
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {
Some(fst) => fst,
None => return Ok(Vec::new()),
};
let search_candidates = self
.search_query
.execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?;
let mut results = match index.sort_facet_values_by(rtxn)?.get(&self.facet) {
OrderBy::Lexicographic => ValuesCollection::by_lexicographic(self.max_values),
OrderBy::Count => ValuesCollection::by_count(self.max_values),
};
match self.query.as_ref() {
Some(query) => {
let options = NormalizerOption { lossy: true, ..Default::default() };
let query = query.normalize(&options);
let query = query.as_ref();
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
let field_authorizes_typos =
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
if authorize_typos && field_authorizes_typos {
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
if fst.contains(query) {
self.fetch_original_facets_using_normalized(
fid,
query,
query,
&search_candidates,
&mut results,
)?;
}
} else {
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
let is_prefix = true;
let automaton = if query.len() < one_typo as usize {
build_dfa(query, 0, is_prefix)
} else if query.len() < two_typos as usize {
build_dfa(query, 1, is_prefix)
} else {
build_dfa(query, 2, is_prefix)
};
let mut stream = fst.search(automaton).into_stream();
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
if self
.fetch_original_facets_using_normalized(
fid,
value,
query,
&search_candidates,
&mut results,
)?
.is_break()
{
break;
}
}
}
} else {
let automaton = Str::new(query).starts_with();
let mut stream = fst.search(automaton).into_stream();
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
if self
.fetch_original_facets_using_normalized(
fid,
value,
query,
&search_candidates,
&mut results,
)?
.is_break()
{
break;
}
}
}
}
None => {
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
result?;
let count = search_candidates.intersection_len(&bitmap);
if count != 0 {
let value = self
.one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
.unwrap_or_else(|| left_bound.to_string());
if results.insert(FacetValueHit { value, count }).is_break() {
break;
}
}
}
}
}
Ok(results.into_sorted_vec())
}
fn fetch_original_facets_using_normalized(
&self,
fid: FieldId,
value: &str,
query: &str,
search_candidates: &RoaringBitmap,
results: &mut ValuesCollection,
) -> Result<ControlFlow<()>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let database = index.facet_id_normalized_string_strings;
let key = (fid, value);
let original_strings = match database.get(rtxn, &key)? {
Some(original_strings) => original_strings,
None => {
error!("the facet value is missing from the facet database: {key:?}");
return Ok(ControlFlow::Continue(()));
}
};
for original in original_strings {
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() };
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
Some(FacetGroupValue { bitmap, .. }) => bitmap,
None => {
error!("the facet value is missing from the facet database: {key:?}");
return Ok(ControlFlow::Continue(()));
}
};
let count = search_candidates.intersection_len(&docids);
if count != 0 {
let value = self
.one_original_value_of(fid, &original, docids.min().unwrap())?
.unwrap_or_else(|| query.to_string());
if results.insert(FacetValueHit { value, count }).is_break() {
break;
}
}
}
Ok(ControlFlow::Continue(()))
}
}
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
pub struct FacetValueHit {
/// The original facet value
pub value: String,
/// The number of documents associated to this facet
pub count: u64,
}
impl PartialOrd for FacetValueHit {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for FacetValueHit {
fn cmp(&self, other: &Self) -> Ordering {
self.count.cmp(&other.count).then_with(|| self.value.cmp(&other.value))
}
}
impl Eq for FacetValueHit {}
/// A wrapper type that collects the best facet values by
/// lexicographic or number of associated values.
enum ValuesCollection {
/// Keeps the top values according to the lexicographic order.
Lexicographic { max: usize, content: Vec<FacetValueHit> },
/// Keeps the top values according to the number of values associated to them.
///
/// Note that it is a max heap and we need to move the smallest counts
/// at the top to be able to pop them when we reach the max_values limit.
Count { max: usize, content: BinaryHeap<Reverse<FacetValueHit>> },
}
impl ValuesCollection {
pub fn by_lexicographic(max: usize) -> Self {
ValuesCollection::Lexicographic { max, content: Vec::new() }
}
pub fn by_count(max: usize) -> Self {
ValuesCollection::Count { max, content: BinaryHeap::new() }
}
pub fn insert(&mut self, value: FacetValueHit) -> ControlFlow<()> {
match self {
ValuesCollection::Lexicographic { max, content } => {
if content.len() < *max {
content.push(value);
if content.len() < *max {
return ControlFlow::Continue(());
}
}
ControlFlow::Break(())
}
ValuesCollection::Count { max, content } => {
if content.len() == *max {
// Peeking gives us the worst value in the list as
// this is a max-heap and we reversed it.
let Some(mut peek) = content.peek_mut() else { return ControlFlow::Break(()) };
if peek.0.count <= value.count {
// Replace the current worst value in the heap
// with the new one we received that is better.
*peek = Reverse(value);
}
} else {
content.push(Reverse(value));
}
ControlFlow::Continue(())
}
}
}
/// Returns the list of facet values in descending order of, either,
/// count or lexicographic order of the value depending on the type.
pub fn into_sorted_vec(self) -> Vec<FacetValueHit> {
match self {
ValuesCollection::Lexicographic { content, .. } => content.into_iter().collect(),
ValuesCollection::Count { content, .. } => {
// Convert the heap into a vec of hits by removing the Reverse wrapper.
// Hits are already in the right order as they were reversed and there
// are output in ascending order.
content.into_sorted_vec().into_iter().map(|Reverse(hit)| hit).collect()
}
}
}
}

View File

@@ -1,17 +1,25 @@
use std::fmt; use std::fmt;
use std::ops::ControlFlow;
use charabia::normalizer::NormalizerOption;
use charabia::Normalize;
use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap; use roaring::bitmap::RoaringBitmap;
use tracing::error;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
use self::new::{execute_vector_search, PartialSearchResult}; use self::new::{execute_vector_search, PartialSearchResult};
use crate::error::UserError;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::DistributionShift; use crate::vector::DistributionShift;
use crate::{ use crate::{
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index,
SearchContext, Result, SearchContext,
}; };
// Building these factories is not free. // Building these factories is not free.
@@ -19,6 +27,9 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true)); static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true)); static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
/// The maximum number of values per facet returned by the facet search route.
const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100;
pub mod facet; pub mod facet;
mod fst_utils; mod fst_utils;
pub mod hybrid; pub mod hybrid;
@@ -291,6 +302,240 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
} }
} }
pub struct SearchForFacetValues<'a> {
query: Option<String>,
facet: String,
search_query: Search<'a>,
max_values: usize,
is_hybrid: bool,
}
impl<'a> SearchForFacetValues<'a> {
pub fn new(
facet: String,
search_query: Search<'a>,
is_hybrid: bool,
) -> SearchForFacetValues<'a> {
SearchForFacetValues {
query: None,
facet,
search_query,
max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
is_hybrid,
}
}
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
self.query = Some(query.into());
self
}
pub fn max_values(&mut self, max: usize) -> &mut Self {
self.max_values = max;
self
}
fn one_original_value_of(
&self,
field_id: FieldId,
facet_str: &str,
any_docid: DocumentId,
) -> Result<Option<String>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let key: (FieldId, _, &str) = (field_id, any_docid, facet_str);
Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned()))
}
pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let filterable_fields = index.filterable_fields(rtxn)?;
if !filterable_fields.contains(&self.facet) {
let (valid_fields, hidden_fields) =
index.remove_hidden_fields(rtxn, filterable_fields)?;
return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(),
valid_fields,
hidden_fields,
}
.into());
}
let fields_ids_map = index.fields_ids_map(rtxn)?;
let fid = match fields_ids_map.id(&self.facet) {
Some(fid) => fid,
// we return an empty list of results when the attribute has been
// set as filterable but no document contains this field (yet).
None => return Ok(Vec::new()),
};
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {
Some(fst) => fst,
None => return Ok(vec![]),
};
let search_candidates = self
.search_query
.execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?;
match self.query.as_ref() {
Some(query) => {
let options = NormalizerOption { lossy: true, ..Default::default() };
let query = query.normalize(&options);
let query = query.as_ref();
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
let field_authorizes_typos =
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
if authorize_typos && field_authorizes_typos {
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
let mut results = vec![];
if fst.contains(query) {
self.fetch_original_facets_using_normalized(
fid,
query,
query,
&search_candidates,
&mut results,
)?;
}
Ok(results)
} else {
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
let is_prefix = true;
let automaton = if query.len() < one_typo as usize {
build_dfa(query, 0, is_prefix)
} else if query.len() < two_typos as usize {
build_dfa(query, 1, is_prefix)
} else {
build_dfa(query, 2, is_prefix)
};
let mut stream = fst.search(automaton).into_stream();
let mut results = vec![];
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
if self
.fetch_original_facets_using_normalized(
fid,
value,
query,
&search_candidates,
&mut results,
)?
.is_break()
{
break;
}
}
Ok(results)
}
} else {
let automaton = Str::new(query).starts_with();
let mut stream = fst.search(automaton).into_stream();
let mut results = vec![];
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
if self
.fetch_original_facets_using_normalized(
fid,
value,
query,
&search_candidates,
&mut results,
)?
.is_break()
{
break;
}
}
Ok(results)
}
}
None => {
let mut results = vec![];
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
result?;
let count = search_candidates.intersection_len(&bitmap);
if count != 0 {
let value = self
.one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
.unwrap_or_else(|| left_bound.to_string());
results.push(FacetValueHit { value, count });
}
if results.len() >= self.max_values {
break;
}
}
Ok(results)
}
}
}
fn fetch_original_facets_using_normalized(
&self,
fid: FieldId,
value: &str,
query: &str,
search_candidates: &RoaringBitmap,
results: &mut Vec<FacetValueHit>,
) -> Result<ControlFlow<()>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let database = index.facet_id_normalized_string_strings;
let key = (fid, value);
let original_strings = match database.get(rtxn, &key)? {
Some(original_strings) => original_strings,
None => {
error!("the facet value is missing from the facet database: {key:?}");
return Ok(ControlFlow::Continue(()));
}
};
for original in original_strings {
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() };
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
Some(FacetGroupValue { bitmap, .. }) => bitmap,
None => {
error!("the facet value is missing from the facet database: {key:?}");
return Ok(ControlFlow::Continue(()));
}
};
let count = search_candidates.intersection_len(&docids);
if count != 0 {
let value = self
.one_original_value_of(fid, &original, docids.min().unwrap())?
.unwrap_or_else(|| query.to_string());
results.push(FacetValueHit { value, count });
}
if results.len() >= self.max_values {
return Ok(ControlFlow::Break(()));
}
}
Ok(ControlFlow::Continue(()))
}
}
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
pub struct FacetValueHit {
/// The original facet value
pub value: String,
/// The number of documents associated to this facet
pub count: u64,
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
#[allow(unused_imports)] #[allow(unused_imports)]

View File

@@ -5,7 +5,7 @@ The typo ranking rule should transform the query graph such that it only contain
the combinations of word derivations that it used to compute its bucket. the combinations of word derivations that it used to compute its bucket.
The proximity ranking rule should then look for proximities only between those specific derivations. The proximity ranking rule should then look for proximities only between those specific derivations.
For example, given the search query `beautiful summer` and the dataset: For example, given the the search query `beautiful summer` and the dataset:
```text ```text
{ "id": 0, "text": "beautigul summer...... beautiful day in the summer" } { "id": 0, "text": "beautigul summer...... beautiful day in the summer" }
{ "id": 1, "text": "beautiful summer" } { "id": 1, "text": "beautiful summer" }

View File

@@ -14,13 +14,12 @@ use super::IndexerConfig;
use crate::criterion::Criterion; use crate::criterion::Criterion;
use crate::error::UserError; use crate::error::UserError;
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision; use crate::proximity::ProximityPrecision;
use crate::update::index_documents::IndexDocumentsMethod; use crate::update::index_documents::IndexDocumentsMethod;
use crate::update::{IndexDocuments, UpdateIndexingStep}; use crate::update::{IndexDocuments, UpdateIndexingStep};
use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings}; use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings};
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
use crate::{FieldsIdsMap, Index, Result}; use crate::{FieldsIdsMap, Index, OrderBy, Result};
#[derive(Debug, Clone, PartialEq, Eq, Copy)] #[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum Setting<T> { pub enum Setting<T> {
@@ -146,7 +145,7 @@ pub struct Settings<'a, 't, 'i> {
/// Attributes on which typo tolerance is disabled. /// Attributes on which typo tolerance is disabled.
exact_attributes: Setting<HashSet<String>>, exact_attributes: Setting<HashSet<String>>,
max_values_per_facet: Setting<usize>, max_values_per_facet: Setting<usize>,
sort_facet_values_by: Setting<OrderByMap>, sort_facet_values_by: Setting<HashMap<String, OrderBy>>,
pagination_max_total_hits: Setting<usize>, pagination_max_total_hits: Setting<usize>,
proximity_precision: Setting<ProximityPrecision>, proximity_precision: Setting<ProximityPrecision>,
embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>, embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
@@ -341,7 +340,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.max_values_per_facet = Setting::Reset; self.max_values_per_facet = Setting::Reset;
} }
pub fn set_sort_facet_values_by(&mut self, value: OrderByMap) { pub fn set_sort_facet_values_by(&mut self, value: HashMap<String, OrderBy>) {
self.sort_facet_values_by = Setting::Set(value); self.sort_facet_values_by = Setting::Set(value);
} }
@@ -1187,13 +1186,6 @@ pub fn validate_embedding_settings(
} }
} }
} }
EmbedderSource::Ollama => {
// Dimensions get inferred, only model name is required
check_unset(&dimensions, "dimensions", inferred_source, name)?;
check_set(&model, "model", inferred_source, name)?;
check_unset(&api_key, "apiKey", inferred_source, name)?;
check_unset(&revision, "revision", inferred_source, name)?;
}
EmbedderSource::HuggingFace => { EmbedderSource::HuggingFace => {
check_unset(&api_key, "apiKey", inferred_source, name)?; check_unset(&api_key, "apiKey", inferred_source, name)?;
check_unset(&dimensions, "dimensions", inferred_source, name)?; check_unset(&dimensions, "dimensions", inferred_source, name)?;

View File

@@ -2,7 +2,6 @@ use std::path::PathBuf;
use hf_hub::api::sync::ApiError; use hf_hub::api::sync::ApiError;
use super::ollama::OllamaError;
use crate::error::FaultSource; use crate::error::FaultSource;
use crate::vector::openai::OpenAiError; use crate::vector::openai::OpenAiError;
@@ -72,17 +71,6 @@ pub enum EmbedErrorKind {
OpenAiRuntimeInit(std::io::Error), OpenAiRuntimeInit(std::io::Error),
#[error("initializing web client for sending embedding requests failed: {0}")] #[error("initializing web client for sending embedding requests failed: {0}")]
InitWebClient(reqwest::Error), InitWebClient(reqwest::Error),
// Dedicated Ollama error kinds, might have to merge them into one cohesive error type for all backends.
#[error("unexpected response from Ollama: {0}")]
OllamaUnexpected(reqwest::Error),
#[error("sent too many requests to Ollama: {0}")]
OllamaTooManyRequests(OllamaError),
#[error("received internal error from Ollama: {0}")]
OllamaInternalServerError(OllamaError),
#[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually: {0}")]
OllamaModelNotFoundError(OllamaError),
#[error("received unhandled HTTP status code {0} from Ollama")]
OllamaUnhandledStatusCode(u16),
} }
impl EmbedError { impl EmbedError {
@@ -141,26 +129,6 @@ impl EmbedError {
pub fn openai_initialize_web_client(inner: reqwest::Error) -> Self { pub fn openai_initialize_web_client(inner: reqwest::Error) -> Self {
Self { kind: EmbedErrorKind::InitWebClient(inner), fault: FaultSource::Runtime } Self { kind: EmbedErrorKind::InitWebClient(inner), fault: FaultSource::Runtime }
} }
pub(crate) fn ollama_unexpected(inner: reqwest::Error) -> EmbedError {
Self { kind: EmbedErrorKind::OllamaUnexpected(inner), fault: FaultSource::Bug }
}
pub(crate) fn ollama_model_not_found(inner: OllamaError) -> EmbedError {
Self { kind: EmbedErrorKind::OllamaModelNotFoundError(inner), fault: FaultSource::User }
}
pub(crate) fn ollama_too_many_requests(inner: OllamaError) -> EmbedError {
Self { kind: EmbedErrorKind::OllamaTooManyRequests(inner), fault: FaultSource::Runtime }
}
pub(crate) fn ollama_internal_server_error(inner: OllamaError) -> EmbedError {
Self { kind: EmbedErrorKind::OllamaInternalServerError(inner), fault: FaultSource::Runtime }
}
pub(crate) fn ollama_unhandled_status_code(code: u16) -> EmbedError {
Self { kind: EmbedErrorKind::OllamaUnhandledStatusCode(code), fault: FaultSource::Bug }
}
} }
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
@@ -227,13 +195,6 @@ impl NewEmbedderError {
} }
} }
pub fn ollama_could_not_determine_dimension(inner: EmbedError) -> NewEmbedderError {
Self {
kind: NewEmbedderErrorKind::CouldNotDetermineDimension(inner),
fault: FaultSource::User,
}
}
pub fn openai_invalid_api_key_format(inner: reqwest::header::InvalidHeaderValue) -> Self { pub fn openai_invalid_api_key_format(inner: reqwest::header::InvalidHeaderValue) -> Self {
Self { kind: NewEmbedderErrorKind::InvalidApiKeyFormat(inner), fault: FaultSource::User } Self { kind: NewEmbedderErrorKind::InvalidApiKeyFormat(inner), fault: FaultSource::User }
} }

View File

@@ -10,8 +10,6 @@ pub mod manual;
pub mod openai; pub mod openai;
pub mod settings; pub mod settings;
pub mod ollama;
pub use self::error::Error; pub use self::error::Error;
pub type Embedding = Vec<f32>; pub type Embedding = Vec<f32>;
@@ -78,7 +76,6 @@ pub enum Embedder {
HuggingFace(hf::Embedder), HuggingFace(hf::Embedder),
OpenAi(openai::Embedder), OpenAi(openai::Embedder),
UserProvided(manual::Embedder), UserProvided(manual::Embedder),
Ollama(ollama::Embedder),
} }
#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)] #[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
@@ -130,7 +127,6 @@ impl IntoIterator for EmbeddingConfigs {
pub enum EmbedderOptions { pub enum EmbedderOptions {
HuggingFace(hf::EmbedderOptions), HuggingFace(hf::EmbedderOptions),
OpenAi(openai::EmbedderOptions), OpenAi(openai::EmbedderOptions),
Ollama(ollama::EmbedderOptions),
UserProvided(manual::EmbedderOptions), UserProvided(manual::EmbedderOptions),
} }
@@ -148,10 +144,6 @@ impl EmbedderOptions {
pub fn openai(api_key: Option<String>) -> Self { pub fn openai(api_key: Option<String>) -> Self {
Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key)) Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key))
} }
pub fn ollama() -> Self {
Self::Ollama(ollama::EmbedderOptions::with_default_model())
}
} }
impl Embedder { impl Embedder {
@@ -159,7 +151,6 @@ impl Embedder {
Ok(match options { Ok(match options {
EmbedderOptions::HuggingFace(options) => Self::HuggingFace(hf::Embedder::new(options)?), EmbedderOptions::HuggingFace(options) => Self::HuggingFace(hf::Embedder::new(options)?),
EmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?), EmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?),
EmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?),
EmbedderOptions::UserProvided(options) => { EmbedderOptions::UserProvided(options) => {
Self::UserProvided(manual::Embedder::new(options)) Self::UserProvided(manual::Embedder::new(options))
} }
@@ -176,10 +167,6 @@ impl Embedder {
let client = embedder.new_client()?; let client = embedder.new_client()?;
embedder.embed(texts, &client).await embedder.embed(texts, &client).await
} }
Embedder::Ollama(embedder) => {
let client = embedder.new_client()?;
embedder.embed(texts, &client).await
}
Embedder::UserProvided(embedder) => embedder.embed(texts), Embedder::UserProvided(embedder) => embedder.embed(texts),
} }
} }
@@ -194,7 +181,6 @@ impl Embedder {
match self { match self {
Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks), Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks),
Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks), Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks),
Embedder::Ollama(embedder) => embedder.embed_chunks(text_chunks),
Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks), Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks),
} }
} }
@@ -203,7 +189,6 @@ impl Embedder {
match self { match self {
Embedder::HuggingFace(embedder) => embedder.chunk_count_hint(), Embedder::HuggingFace(embedder) => embedder.chunk_count_hint(),
Embedder::OpenAi(embedder) => embedder.chunk_count_hint(), Embedder::OpenAi(embedder) => embedder.chunk_count_hint(),
Embedder::Ollama(embedder) => embedder.chunk_count_hint(),
Embedder::UserProvided(_) => 1, Embedder::UserProvided(_) => 1,
} }
} }
@@ -212,7 +197,6 @@ impl Embedder {
match self { match self {
Embedder::HuggingFace(embedder) => embedder.prompt_count_in_chunk_hint(), Embedder::HuggingFace(embedder) => embedder.prompt_count_in_chunk_hint(),
Embedder::OpenAi(embedder) => embedder.prompt_count_in_chunk_hint(), Embedder::OpenAi(embedder) => embedder.prompt_count_in_chunk_hint(),
Embedder::Ollama(embedder) => embedder.prompt_count_in_chunk_hint(),
Embedder::UserProvided(_) => 1, Embedder::UserProvided(_) => 1,
} }
} }
@@ -221,7 +205,6 @@ impl Embedder {
match self { match self {
Embedder::HuggingFace(embedder) => embedder.dimensions(), Embedder::HuggingFace(embedder) => embedder.dimensions(),
Embedder::OpenAi(embedder) => embedder.dimensions(), Embedder::OpenAi(embedder) => embedder.dimensions(),
Embedder::Ollama(embedder) => embedder.dimensions(),
Embedder::UserProvided(embedder) => embedder.dimensions(), Embedder::UserProvided(embedder) => embedder.dimensions(),
} }
} }
@@ -230,7 +213,6 @@ impl Embedder {
match self { match self {
Embedder::HuggingFace(embedder) => embedder.distribution(), Embedder::HuggingFace(embedder) => embedder.distribution(),
Embedder::OpenAi(embedder) => embedder.distribution(), Embedder::OpenAi(embedder) => embedder.distribution(),
Embedder::Ollama(embedder) => embedder.distribution(),
Embedder::UserProvided(_embedder) => None, Embedder::UserProvided(_embedder) => None,
} }
} }

View File

@@ -1,307 +0,0 @@
// Copied from "openai.rs" with the sections I actually understand changed for Ollama.
// The common components of the Ollama and OpenAI interfaces might need to be extracted.
use std::fmt::Display;
use reqwest::StatusCode;
use super::error::{EmbedError, NewEmbedderError};
use super::openai::Retry;
use super::{DistributionShift, Embedding, Embeddings};
#[derive(Debug)]
pub struct Embedder {
headers: reqwest::header::HeaderMap,
options: EmbedderOptions,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
pub struct EmbedderOptions {
pub embedding_model: EmbeddingModel,
}
#[derive(
Debug, Clone, Hash, PartialEq, Eq, serde::Serialize, serde::Deserialize, deserr::Deserr,
)]
#[deserr(deny_unknown_fields)]
pub struct EmbeddingModel {
name: String,
dimensions: usize,
}
#[derive(Debug, serde::Serialize)]
struct OllamaRequest<'a> {
model: &'a str,
prompt: &'a str,
}
#[derive(Debug, serde::Deserialize)]
struct OllamaResponse {
embedding: Embedding,
}
#[derive(Debug, serde::Deserialize)]
pub struct OllamaError {
error: String,
}
impl EmbeddingModel {
pub fn max_token(&self) -> usize {
// this might not be the same for all models
8192
}
pub fn default_dimensions(&self) -> usize {
// Dimensions for nomic-embed-text
768
}
pub fn name(&self) -> String {
self.name.clone()
}
pub fn from_name(name: &str) -> Self {
Self { name: name.to_string(), dimensions: 0 }
}
pub fn supports_overriding_dimensions(&self) -> bool {
false
}
}
impl Default for EmbeddingModel {
fn default() -> Self {
Self { name: "nomic-embed-text".to_string(), dimensions: 0 }
}
}
impl EmbedderOptions {
pub fn with_default_model() -> Self {
Self { embedding_model: Default::default() }
}
pub fn with_embedding_model(embedding_model: EmbeddingModel) -> Self {
Self { embedding_model }
}
}
impl Embedder {
pub fn new_client(&self) -> Result<reqwest::Client, EmbedError> {
reqwest::ClientBuilder::new()
.default_headers(self.headers.clone())
.build()
.map_err(EmbedError::openai_initialize_web_client)
}
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
let mut headers = reqwest::header::HeaderMap::new();
headers.insert(
reqwest::header::CONTENT_TYPE,
reqwest::header::HeaderValue::from_static("application/json"),
);
let mut embedder = Self { options, headers };
let rt = tokio::runtime::Builder::new_current_thread()
.enable_io()
.enable_time()
.build()
.map_err(EmbedError::openai_runtime_init)
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
// Get dimensions from Ollama
let request =
OllamaRequest { model: &embedder.options.embedding_model.name(), prompt: "test" };
// TODO: Refactor into shared error type
let client = embedder
.new_client()
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
rt.block_on(async move {
let response = client
.post(get_ollama_path())
.json(&request)
.send()
.await
.map_err(EmbedError::ollama_unexpected)
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
// Process error in case model not found
let response = Self::check_response(response).await.map_err(|_err| {
let e = EmbedError::ollama_model_not_found(OllamaError {
error: format!("model: {}", embedder.options.embedding_model.name()),
});
NewEmbedderError::ollama_could_not_determine_dimension(e)
})?;
let response: OllamaResponse = response
.json()
.await
.map_err(EmbedError::ollama_unexpected)
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
let embedding = Embeddings::from_single_embedding(response.embedding);
embedder.options.embedding_model.dimensions = embedding.dimension();
tracing::info!(
"ollama model {} with dimensionality {} added",
embedder.options.embedding_model.name(),
embedding.dimension()
);
Ok(embedder)
})
}
async fn check_response(response: reqwest::Response) -> Result<reqwest::Response, Retry> {
if !response.status().is_success() {
// Not the same number of possible error cases covered as with OpenAI.
match response.status() {
StatusCode::TOO_MANY_REQUESTS => {
let error_response: OllamaError = response
.json()
.await
.map_err(EmbedError::ollama_unexpected)
.map_err(Retry::retry_later)?;
return Err(Retry::rate_limited(EmbedError::ollama_too_many_requests(
OllamaError { error: error_response.error },
)));
}
StatusCode::SERVICE_UNAVAILABLE => {
let error_response: OllamaError = response
.json()
.await
.map_err(EmbedError::ollama_unexpected)
.map_err(Retry::retry_later)?;
return Err(Retry::retry_later(EmbedError::ollama_internal_server_error(
OllamaError { error: error_response.error },
)));
}
StatusCode::NOT_FOUND => {
let error_response: OllamaError = response
.json()
.await
.map_err(EmbedError::ollama_unexpected)
.map_err(Retry::give_up)?;
return Err(Retry::give_up(EmbedError::ollama_model_not_found(OllamaError {
error: error_response.error,
})));
}
code => {
return Err(Retry::give_up(EmbedError::ollama_unhandled_status_code(
code.as_u16(),
)));
}
}
}
Ok(response)
}
pub async fn embed(
&self,
texts: Vec<String>,
client: &reqwest::Client,
) -> Result<Vec<Embeddings<f32>>, EmbedError> {
// Ollama only embedds one document at a time.
let mut results = Vec::with_capacity(texts.len());
// The retry loop is inside the texts loop, might have to switch that around
for text in texts {
// Retries copied from openai.rs
for attempt in 0..7 {
let retry_duration = match self.try_embed(&text, client).await {
Ok(result) => {
results.push(result);
break;
}
Err(retry) => {
tracing::warn!("Failed: {}", retry.error);
retry.into_duration(attempt)
}
}?;
tracing::warn!(
"Attempt #{}, retrying after {}ms.",
attempt,
retry_duration.as_millis()
);
tokio::time::sleep(retry_duration).await;
}
}
Ok(results)
}
async fn try_embed(
&self,
text: &str,
client: &reqwest::Client,
) -> Result<Embeddings<f32>, Retry> {
let request = OllamaRequest { model: &self.options.embedding_model.name(), prompt: text };
let response = client
.post(get_ollama_path())
.json(&request)
.send()
.await
.map_err(EmbedError::openai_network)
.map_err(Retry::retry_later)?;
let response = Self::check_response(response).await?;
let response: OllamaResponse = response
.json()
.await
.map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?;
tracing::trace!("response: {:?}", response.embedding);
let embedding = Embeddings::from_single_embedding(response.embedding);
Ok(embedding)
}
pub fn embed_chunks(
&self,
text_chunks: Vec<Vec<String>>,
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_io()
.enable_time()
.build()
.map_err(EmbedError::openai_runtime_init)?;
let client = self.new_client()?;
rt.block_on(futures::future::try_join_all(
text_chunks.into_iter().map(|prompts| self.embed(prompts, &client)),
))
}
// Defaults copied from openai.rs
pub fn chunk_count_hint(&self) -> usize {
10
}
pub fn prompt_count_in_chunk_hint(&self) -> usize {
10
}
pub fn dimensions(&self) -> usize {
self.options.embedding_model.dimensions
}
pub fn distribution(&self) -> Option<DistributionShift> {
None
}
}
impl Display for OllamaError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.error)
}
}
fn get_ollama_path() -> String {
// Important: Hostname not enough, has to be entire path to embeddings endpoint
std::env::var("MEILI_OLLAMA_URL").unwrap_or("http://localhost:11434/api/embeddings".to_string())
}

View File

@@ -419,12 +419,12 @@ impl Embedder {
// retrying in case of failure // retrying in case of failure
pub struct Retry { struct Retry {
pub error: EmbedError, error: EmbedError,
strategy: RetryStrategy, strategy: RetryStrategy,
} }
pub enum RetryStrategy { enum RetryStrategy {
GiveUp, GiveUp,
Retry, Retry,
RetryTokenized, RetryTokenized,
@@ -432,23 +432,23 @@ pub enum RetryStrategy {
} }
impl Retry { impl Retry {
pub fn give_up(error: EmbedError) -> Self { fn give_up(error: EmbedError) -> Self {
Self { error, strategy: RetryStrategy::GiveUp } Self { error, strategy: RetryStrategy::GiveUp }
} }
pub fn retry_later(error: EmbedError) -> Self { fn retry_later(error: EmbedError) -> Self {
Self { error, strategy: RetryStrategy::Retry } Self { error, strategy: RetryStrategy::Retry }
} }
pub fn retry_tokenized(error: EmbedError) -> Self { fn retry_tokenized(error: EmbedError) -> Self {
Self { error, strategy: RetryStrategy::RetryTokenized } Self { error, strategy: RetryStrategy::RetryTokenized }
} }
pub fn rate_limited(error: EmbedError) -> Self { fn rate_limited(error: EmbedError) -> Self {
Self { error, strategy: RetryStrategy::RetryAfterRateLimit } Self { error, strategy: RetryStrategy::RetryAfterRateLimit }
} }
pub fn into_duration(self, attempt: u32) -> Result<tokio::time::Duration, EmbedError> { fn into_duration(self, attempt: u32) -> Result<tokio::time::Duration, EmbedError> {
match self.strategy { match self.strategy {
RetryStrategy::GiveUp => Err(self.error), RetryStrategy::GiveUp => Err(self.error),
RetryStrategy::Retry => Ok(tokio::time::Duration::from_millis((10u64).pow(attempt))), RetryStrategy::Retry => Ok(tokio::time::Duration::from_millis((10u64).pow(attempt))),
@@ -459,11 +459,11 @@ impl Retry {
} }
} }
pub fn must_tokenize(&self) -> bool { fn must_tokenize(&self) -> bool {
matches!(self.strategy, RetryStrategy::RetryTokenized) matches!(self.strategy, RetryStrategy::RetryTokenized)
} }
pub fn into_error(self) -> EmbedError { fn into_error(self) -> EmbedError {
self.error self.error
} }
} }

View File

@@ -1,7 +1,7 @@
use deserr::Deserr; use deserr::Deserr;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use super::{ollama, openai}; use super::openai;
use crate::prompt::PromptData; use crate::prompt::PromptData;
use crate::update::Setting; use crate::update::Setting;
use crate::vector::EmbeddingConfig; use crate::vector::EmbeddingConfig;
@@ -80,15 +80,11 @@ impl EmbeddingSettings {
Self::SOURCE => { Self::SOURCE => {
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided] &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided]
} }
Self::MODEL => { Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama]
}
Self::REVISION => &[EmbedderSource::HuggingFace], Self::REVISION => &[EmbedderSource::HuggingFace],
Self::API_KEY => &[EmbedderSource::OpenAi], Self::API_KEY => &[EmbedderSource::OpenAi],
Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided], Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided],
Self::DOCUMENT_TEMPLATE => { Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama]
}
_other => unreachable!("unknown field"), _other => unreachable!("unknown field"),
} }
} }
@@ -105,7 +101,6 @@ impl EmbeddingSettings {
EmbedderSource::HuggingFace => { EmbedderSource::HuggingFace => {
&[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE] &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE]
} }
EmbedderSource::Ollama => &[Self::SOURCE, Self::MODEL, Self::DOCUMENT_TEMPLATE],
EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS], EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS],
} }
} }
@@ -139,7 +134,6 @@ pub enum EmbedderSource {
#[default] #[default]
OpenAi, OpenAi,
HuggingFace, HuggingFace,
Ollama,
UserProvided, UserProvided,
} }
@@ -149,7 +143,6 @@ impl std::fmt::Display for EmbedderSource {
EmbedderSource::OpenAi => "openAi", EmbedderSource::OpenAi => "openAi",
EmbedderSource::HuggingFace => "huggingFace", EmbedderSource::HuggingFace => "huggingFace",
EmbedderSource::UserProvided => "userProvided", EmbedderSource::UserProvided => "userProvided",
EmbedderSource::Ollama => "ollama",
}; };
f.write_str(s) f.write_str(s)
} }
@@ -202,14 +195,6 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(), dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(),
document_template: Setting::Set(prompt.template), document_template: Setting::Set(prompt.template),
}, },
super::EmbedderOptions::Ollama(options) => Self {
source: Setting::Set(EmbedderSource::Ollama),
model: Setting::Set(options.embedding_model.name().to_owned()),
revision: Setting::NotSet,
api_key: Setting::NotSet,
dimensions: Setting::NotSet,
document_template: Setting::Set(prompt.template),
},
super::EmbedderOptions::UserProvided(options) => Self { super::EmbedderOptions::UserProvided(options) => Self {
source: Setting::Set(EmbedderSource::UserProvided), source: Setting::Set(EmbedderSource::UserProvided),
model: Setting::NotSet, model: Setting::NotSet,
@@ -244,14 +229,6 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
} }
this.embedder_options = super::EmbedderOptions::OpenAi(options); this.embedder_options = super::EmbedderOptions::OpenAi(options);
} }
EmbedderSource::Ollama => {
let mut options: ollama::EmbedderOptions =
super::ollama::EmbedderOptions::with_default_model();
if let Some(model) = model.set() {
options.embedding_model = super::ollama::EmbeddingModel::from_name(&model);
}
this.embedder_options = super::EmbedderOptions::Ollama(options);
}
EmbedderSource::HuggingFace => { EmbedderSource::HuggingFace => {
let mut options = super::hf::EmbedderOptions::default(); let mut options = super::hf::EmbedderOptions::default();
if let Some(model) = model.set() { if let Some(model) = model.set() {

View File

@@ -1,94 +0,0 @@
{
"name": "settings-add-remove-filters.json",
"run_count": 2,
"extra_cli_args": [
"--max-indexing-threads=4"
],
"assets": {
"150k-people.json": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json",
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
}
},
"commands": [
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"searchableAttributes": [
"last_name",
"first_name",
"featured_job_organization_name",
"facebook_url",
"twitter_url",
"linkedin_url"
],
"filterableAttributes": [
"city",
"region",
"country_code"
],
"dictionary": [
"https://",
"http://",
"www.",
"crunchbase.com",
"facebook.com",
"twitter.com",
"linkedin.com"
],
"stopWords": [
"https://",
"http://",
"www.",
"crunchbase.com",
"facebook.com",
"twitter.com",
"linkedin.com"
]
}
},
"synchronous": "DontWait"
},
{
"route": "indexes/peoples/documents",
"method": "POST",
"body": {
"asset": "150k-people.json"
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"filterableAttributes": [
"city",
"region",
"country_code",
"featured_job_title",
"featured_job_organization_name"
]
}
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"filterableAttributes": [
"city",
"region",
"country_code"
]
}
},
"synchronous": "WaitForTask"
}
]
}

View File

@@ -1,86 +0,0 @@
{
"name": "settings-proximity-precision.json",
"run_count": 2,
"extra_cli_args": [
"--max-indexing-threads=4"
],
"assets": {
"150k-people.json": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json",
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
}
},
"commands": [
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"searchableAttributes": [
"last_name",
"first_name",
"featured_job_organization_name",
"facebook_url",
"twitter_url",
"linkedin_url"
],
"filterableAttributes": [
"city",
"region",
"country_code",
"featured_job_title",
"featured_job_organization_name"
],
"dictionary": [
"https://",
"http://",
"www.",
"crunchbase.com",
"facebook.com",
"twitter.com",
"linkedin.com"
],
"stopWords": [
"https://",
"http://",
"www.",
"crunchbase.com",
"facebook.com",
"twitter.com",
"linkedin.com"
]
}
},
"synchronous": "DontWait"
},
{
"route": "indexes/peoples/documents",
"method": "POST",
"body": {
"asset": "150k-people.json"
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"proximityPrecision": "byAttribute"
}
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"proximityPrecision": "byWord"
}
},
"synchronous": "WaitForTask"
}
]
}

View File

@@ -1,114 +0,0 @@
{
"name": "settings-remove-add-swap-searchable.json",
"run_count": 2,
"extra_cli_args": [
"--max-indexing-threads=4"
],
"assets": {
"150k-people.json": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json",
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
}
},
"commands": [
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"searchableAttributes": [
"last_name",
"first_name",
"featured_job_organization_name",
"facebook_url",
"twitter_url",
"linkedin_url"
],
"filterableAttributes": [
"city",
"region",
"country_code",
"featured_job_title",
"featured_job_organization_name"
],
"dictionary": [
"https://",
"http://",
"www.",
"crunchbase.com",
"facebook.com",
"twitter.com",
"linkedin.com"
],
"stopWords": [
"https://",
"http://",
"www.",
"crunchbase.com",
"facebook.com",
"twitter.com",
"linkedin.com"
]
}
},
"synchronous": "DontWait"
},
{
"route": "indexes/peoples/documents",
"method": "POST",
"body": {
"asset": "150k-people.json"
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"searchableAttributes": [
"last_name",
"first_name",
"featured_job_organization_name"
]
}
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"searchableAttributes": [
"last_name",
"first_name",
"featured_job_organization_name",
"facebook_url",
"twitter_url",
"linkedin_url"
]
}
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"searchableAttributes": [
"first_name",
"last_name",
"featured_job_organization_name",
"facebook_url",
"twitter_url",
"linkedin_url"
]
}
},
"synchronous": "WaitForTask"
}
]
}

View File

@@ -1,115 +0,0 @@
{
"name": "settings-typo.json",
"run_count": 2,
"extra_cli_args": [
"--max-indexing-threads=4"
],
"assets": {
"150k-people.json": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json",
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
}
},
"commands": [
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"searchableAttributes": [
"last_name",
"first_name",
"featured_job_title",
"featured_job_organization_name",
"facebook_url",
"twitter_url",
"linkedin_url"
],
"filterableAttributes": [
"city",
"region",
"country_code",
"featured_job_title",
"featured_job_organization_name"
],
"dictionary": [
"https://",
"http://",
"www.",
"crunchbase.com",
"facebook.com",
"twitter.com",
"linkedin.com"
],
"stopWords": [
"https://",
"http://",
"www.",
"crunchbase.com",
"facebook.com",
"twitter.com",
"linkedin.com"
]
}
},
"synchronous": "DontWait"
},
{
"route": "indexes/peoples/documents",
"method": "POST",
"body": {
"asset": "150k-people.json"
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"typoTolerance": {
"disableOnAttributes": ["featured_job_organization_name"]
}
}
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"typoTolerance": {
"disableOnAttributes": []
}
}
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"typoTolerance": {
"disableOnWords": ["Ben","Elowitz","Kevin","Flaherty", "Ron", "Dustin", "Owen", "Chris", "Mark", "Matt", "Peter", "Van", "Head", "of"]
}
}
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/peoples/settings",
"method": "PATCH",
"body": {
"inline": {
"typoTolerance": {
"disableOnWords": []
}
}
},
"synchronous": "WaitForTask"
}
]
}

View File

@@ -11,179 +11,157 @@ use super::client::Client;
use super::env_info; use super::env_info;
use super::workload::Workload; use super::workload::Workload;
#[derive(Debug, Clone)] pub async fn cancel_on_ctrl_c(
pub enum DashboardClient { invocation_uuid: Uuid,
Client(Client), dashboard_client: Client,
Dry, abort_handle: AbortHandle,
) {
tracing::info!("press Ctrl-C to cancel the invocation");
match ctrl_c().await {
Ok(()) => {
tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation");
mark_as_failed(dashboard_client, invocation_uuid, None).await;
abort_handle.abort();
}
Err(error) => tracing::warn!(
error = &error as &dyn std::error::Error,
"failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C"
),
}
} }
impl DashboardClient { pub async fn mark_as_failed(
pub fn new(dashboard_url: &str, api_key: Option<&str>) -> anyhow::Result<Self> { dashboard_client: Client,
let dashboard_client = Client::new( invocation_uuid: Uuid,
Some(format!("{}/api/v1", dashboard_url)), failure_reason: Option<String>,
api_key, ) {
Some(std::time::Duration::from_secs(60)), let response = dashboard_client
)?; .post("cancel-invocation")
.json(&json!({
Ok(Self::Client(dashboard_client)) "invocation_uuid": invocation_uuid,
} "failure_reason": failure_reason,
}))
pub fn new_dry() -> Self { .send()
Self::Dry .await;
} let response = match response {
Ok(response) => response,
pub async fn send_machine_info(&self, env: &env_info::Environment) -> anyhow::Result<()> { Err(response_error) => {
let Self::Client(dashboard_client) = self else { return Ok(()) }; tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed");
return;
let response = dashboard_client
.put("machine")
.json(&json!({"hostname": env.hostname}))
.send()
.await
.context("sending machine information")?;
if !response.status().is_success() {
bail!(
"could not send machine information: {} {}",
response.status(),
response.text().await.unwrap_or_else(|_| "unknown".into())
);
} }
Ok(()) };
}
if !response.status().is_success() {
pub async fn create_invocation( tracing::error!(
&self, %invocation_uuid,
build_info: build_info::BuildInfo, "could not mark invocation as failed: {}",
commit_message: &str, response.text().await.unwrap()
env: env_info::Environment, );
max_workloads: usize, return;
reason: Option<&str>,
) -> anyhow::Result<Uuid> {
let Self::Client(dashboard_client) = self else { return Ok(Uuid::now_v7()) };
let response = dashboard_client
.put("invocation")
.json(&json!({
"commit": {
"sha1": build_info.commit_sha1,
"message": commit_message,
"commit_date": build_info.commit_timestamp,
"branch": build_info.branch,
"tag": build_info.describe.and_then(|describe| describe.as_tag()),
},
"machine_hostname": env.hostname,
"max_workloads": max_workloads,
"reason": reason
}))
.send()
.await
.context("sending invocation")?;
if !response.status().is_success() {
bail!(
"could not send new invocation: {}",
response.text().await.unwrap_or_else(|_| "unknown".into())
);
}
let invocation_uuid: Uuid =
response.json().await.context("could not deserialize invocation response as JSON")?;
Ok(invocation_uuid)
}
pub async fn create_workload(
&self,
invocation_uuid: Uuid,
workload: &Workload,
) -> anyhow::Result<Uuid> {
let Self::Client(dashboard_client) = self else { return Ok(Uuid::now_v7()) };
let response = dashboard_client
.put("workload")
.json(&json!({
"invocation_uuid": invocation_uuid,
"name": &workload.name,
"max_runs": workload.run_count,
}))
.send()
.await
.context("could not create new workload")?;
if !response.status().is_success() {
bail!("creating new workload failed: {}", response.text().await.unwrap())
}
let workload_uuid: Uuid =
response.json().await.context("could not deserialize JSON as UUID")?;
Ok(workload_uuid)
}
pub async fn create_run(
&self,
workload_uuid: Uuid,
report: &BTreeMap<String, CallStats>,
) -> anyhow::Result<()> {
let Self::Client(dashboard_client) = self else { return Ok(()) };
let response = dashboard_client
.put("run")
.json(&json!({
"workload_uuid": workload_uuid,
"data": report
}))
.send()
.await
.context("sending new run")?;
if !response.status().is_success() {
bail!(
"sending new run failed: {}",
response.text().await.unwrap_or_else(|_| "unknown".into())
)
}
Ok(())
}
pub async fn cancel_on_ctrl_c(self, invocation_uuid: Uuid, abort_handle: AbortHandle) {
tracing::info!("press Ctrl-C to cancel the invocation");
match ctrl_c().await {
Ok(()) => {
tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation");
self.mark_as_failed(invocation_uuid, None).await;
abort_handle.abort();
}
Err(error) => tracing::warn!(
error = &error as &dyn std::error::Error,
"failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C"
),
}
}
pub async fn mark_as_failed(&self, invocation_uuid: Uuid, failure_reason: Option<String>) {
if let DashboardClient::Client(client) = self {
let response = client
.post("cancel-invocation")
.json(&json!({
"invocation_uuid": invocation_uuid,
"failure_reason": failure_reason,
}))
.send()
.await;
let response = match response {
Ok(response) => response,
Err(response_error) => {
tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed");
return;
}
};
if !response.status().is_success() {
tracing::error!(
%invocation_uuid,
"could not mark invocation as failed: {}",
response.text().await.unwrap()
);
return;
}
}
tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled");
} }
tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled");
}
pub async fn send_machine_info(
dashboard_client: &Client,
env: &env_info::Environment,
) -> anyhow::Result<()> {
let response = dashboard_client
.put("machine")
.json(&json!({"hostname": env.hostname}))
.send()
.await
.context("sending machine information")?;
if !response.status().is_success() {
bail!(
"could not send machine information: {} {}",
response.status(),
response.text().await.unwrap_or_else(|_| "unknown".into())
);
}
Ok(())
}
pub async fn create_invocation(
dashboard_client: &Client,
build_info: build_info::BuildInfo,
commit_message: &str,
env: env_info::Environment,
max_workloads: usize,
reason: Option<&str>,
) -> anyhow::Result<Uuid> {
let response = dashboard_client
.put("invocation")
.json(&json!({
"commit": {
"sha1": build_info.commit_sha1,
"message": commit_message,
"commit_date": build_info.commit_timestamp,
"branch": build_info.branch,
"tag": build_info.describe.and_then(|describe| describe.as_tag()),
},
"machine_hostname": env.hostname,
"max_workloads": max_workloads,
"reason": reason
}))
.send()
.await
.context("sending invocation")?;
if !response.status().is_success() {
bail!(
"could not send new invocation: {}",
response.text().await.unwrap_or_else(|_| "unknown".into())
);
}
let invocation_uuid: Uuid =
response.json().await.context("could not deserialize invocation response as JSON")?;
Ok(invocation_uuid)
}
pub async fn create_workload(
dashboard_client: &Client,
invocation_uuid: Uuid,
workload: &Workload,
) -> anyhow::Result<Uuid> {
let response = dashboard_client
.put("workload")
.json(&json!({
"invocation_uuid": invocation_uuid,
"name": &workload.name,
"max_runs": workload.run_count,
}))
.send()
.await
.context("could not create new workload")?;
if !response.status().is_success() {
bail!("creating new workload failed: {}", response.text().await.unwrap())
}
let workload_uuid: Uuid =
response.json().await.context("could not deserialize JSON as UUID")?;
Ok(workload_uuid)
}
pub async fn create_run(
dashboard_client: Client,
workload_uuid: Uuid,
report: &BTreeMap<String, CallStats>,
) -> anyhow::Result<()> {
let response = dashboard_client
.put("run")
.json(&json!({
"workload_uuid": workload_uuid,
"data": report
}))
.send()
.await
.context("sending new run")?;
if !response.status().is_success() {
bail!(
"sending new run failed: {}",
response.text().await.unwrap_or_else(|_| "unknown".into())
)
}
Ok(())
} }

View File

@@ -50,10 +50,6 @@ pub struct BenchDeriveArgs {
#[arg(long, default_value_t = default_dashboard_url())] #[arg(long, default_value_t = default_dashboard_url())]
dashboard_url: String, dashboard_url: String,
/// Don't actually send results to the dashboard
#[arg(long)]
no_dashboard: bool,
/// Directory to output reports. /// Directory to output reports.
#[arg(long, default_value_t = default_report_folder())] #[arg(long, default_value_t = default_report_folder())]
report_folder: String, report_folder: String,
@@ -107,11 +103,11 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
let assets_client = let assets_client =
Client::new(None, args.assets_key.as_deref(), Some(std::time::Duration::from_secs(3600)))?; // 1h Client::new(None, args.assets_key.as_deref(), Some(std::time::Duration::from_secs(3600)))?; // 1h
let dashboard_client = if args.no_dashboard { let dashboard_client = Client::new(
dashboard::DashboardClient::new_dry() Some(format!("{}/api/v1", args.dashboard_url)),
} else { args.api_key.as_deref(),
dashboard::DashboardClient::new(&args.dashboard_url, args.api_key.as_deref())? Some(std::time::Duration::from_secs(60)),
}; )?;
// reporting uses its own client because keeping the stream open to wait for entries // reporting uses its own client because keeping the stream open to wait for entries
// blocks any other requests // blocks any other requests
@@ -131,12 +127,12 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
// enter runtime // enter runtime
rt.block_on(async { rt.block_on(async {
dashboard_client.send_machine_info(&env).await?; dashboard::send_machine_info(&dashboard_client, &env).await?;
let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap(); let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap();
let max_workloads = args.workload_file.len(); let max_workloads = args.workload_file.len();
let reason: Option<&str> = args.reason.as_deref(); let reason: Option<&str> = args.reason.as_deref();
let invocation_uuid = dashboard_client.create_invocation( build_info, commit_message, env, max_workloads, reason).await?; let invocation_uuid = dashboard::create_invocation(&dashboard_client, build_info, commit_message, env, max_workloads, reason).await?;
tracing::info!(workload_count = args.workload_file.len(), "handling workload files"); tracing::info!(workload_count = args.workload_file.len(), "handling workload files");
@@ -171,7 +167,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
let abort_handle = workload_runs.abort_handle(); let abort_handle = workload_runs.abort_handle();
tokio::spawn({ tokio::spawn({
let dashboard_client = dashboard_client.clone(); let dashboard_client = dashboard_client.clone();
dashboard_client.cancel_on_ctrl_c(invocation_uuid, abort_handle) dashboard::cancel_on_ctrl_c(invocation_uuid, dashboard_client, abort_handle)
}); });
// wait for the end of the main task, handle result // wait for the end of the main task, handle result
@@ -182,7 +178,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
} }
Ok(Err(error)) => { Ok(Err(error)) => {
tracing::error!(%invocation_uuid, error = %error, "invocation failed, attempting to report the failure to dashboard"); tracing::error!(%invocation_uuid, error = %error, "invocation failed, attempting to report the failure to dashboard");
dashboard_client.mark_as_failed(invocation_uuid, Some(error.to_string())).await; dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some(error.to_string())).await;
tracing::warn!(%invocation_uuid, "invocation marked as failed following error"); tracing::warn!(%invocation_uuid, "invocation marked as failed following error");
Err(error) Err(error)
}, },
@@ -190,7 +186,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
match join_error.try_into_panic() { match join_error.try_into_panic() {
Ok(panic) => { Ok(panic) => {
tracing::error!("invocation panicked, attempting to report the failure to dashboard"); tracing::error!("invocation panicked, attempting to report the failure to dashboard");
dashboard_client.mark_as_failed( invocation_uuid, Some("Panicked".into())).await; dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some("Panicked".into())).await;
std::panic::resume_unwind(panic) std::panic::resume_unwind(panic)
} }
Err(_) => { Err(_) => {

View File

@@ -12,9 +12,8 @@ use uuid::Uuid;
use super::assets::Asset; use super::assets::Asset;
use super::client::Client; use super::client::Client;
use super::command::SyncMode; use super::command::SyncMode;
use super::dashboard::DashboardClient;
use super::BenchDeriveArgs; use super::BenchDeriveArgs;
use crate::bench::{assets, meili_process}; use crate::bench::{assets, dashboard, meili_process};
#[derive(Deserialize)] #[derive(Deserialize)]
pub struct Workload { pub struct Workload {
@@ -26,7 +25,7 @@ pub struct Workload {
} }
async fn run_commands( async fn run_commands(
dashboard_client: &DashboardClient, dashboard_client: &Client,
logs_client: &Client, logs_client: &Client,
meili_client: &Client, meili_client: &Client,
workload_uuid: Uuid, workload_uuid: Uuid,
@@ -65,7 +64,7 @@ async fn run_commands(
#[tracing::instrument(skip(assets_client, dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = workload.name))] #[tracing::instrument(skip(assets_client, dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = workload.name))]
pub async fn execute( pub async fn execute(
assets_client: &Client, assets_client: &Client,
dashboard_client: &DashboardClient, dashboard_client: &Client,
logs_client: &Client, logs_client: &Client,
meili_client: &Client, meili_client: &Client,
invocation_uuid: Uuid, invocation_uuid: Uuid,
@@ -75,7 +74,8 @@ pub async fn execute(
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?; assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
let workload_uuid = dashboard_client.create_workload(invocation_uuid, &workload).await?; let workload_uuid =
dashboard::create_workload(dashboard_client, invocation_uuid, &workload).await?;
let mut tasks = Vec::new(); let mut tasks = Vec::new();
@@ -113,7 +113,7 @@ pub async fn execute(
#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner #[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
#[tracing::instrument(skip(dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = %workload.name))] #[tracing::instrument(skip(dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = %workload.name))]
async fn execute_run( async fn execute_run(
dashboard_client: &DashboardClient, dashboard_client: &Client,
logs_client: &Client, logs_client: &Client,
meili_client: &Client, meili_client: &Client,
workload_uuid: Uuid, workload_uuid: Uuid,
@@ -202,7 +202,7 @@ async fn start_report(
} }
async fn stop_report( async fn stop_report(
dashboard_client: &DashboardClient, dashboard_client: &Client,
logs_client: &Client, logs_client: &Client,
workload_uuid: Uuid, workload_uuid: Uuid,
filename: String, filename: String,
@@ -232,7 +232,7 @@ async fn stop_report(
.context("could not convert trace to report")?; .context("could not convert trace to report")?;
let context = || format!("writing report to {filename}"); let context = || format!("writing report to {filename}");
dashboard_client.create_run(workload_uuid, &report).await?; dashboard::create_run(dashboard_client, workload_uuid, &report).await?;
let mut output_file = std::io::BufWriter::new( let mut output_file = std::io::BufWriter::new(
std::fs::File::options() std::fs::File::options()