mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-01 18:25:37 +00:00
Compare commits
15 Commits
prototype-
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4a21fecf67 | ||
|
|
ae8e69c030 | ||
|
|
3dda93d50f | ||
|
|
117146ec4e | ||
|
|
884b4d47b1 | ||
|
|
023cb0c2de | ||
|
|
9d5e3457e5 | ||
|
|
04694071fe | ||
|
|
b0c1a9504a | ||
|
|
d57026cd96 | ||
|
|
41c9e8856a | ||
|
|
d4ff59fcf5 | ||
|
|
9c485f8563 | ||
|
|
d8d12d5979 | ||
|
|
0597a97c84 |
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
|
||||
8
.github/workflows/publish-binaries.yml
vendored
8
.github/workflows/publish-binaries.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
@@ -183,7 +183,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
|
||||
10
.github/workflows/test-suite.yml
vendored
10
.github/workflows/test-suite.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -165,7 +165,7 @@ jobs:
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
||||
433
Cargo.lock
generated
433
Cargo.lock
generated
@@ -9,7 +9,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "617a8268e3537fe1d8c9ead925fca49ef6400927ee7bc26750e90ecee14ce4b8"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"memchr",
|
||||
@@ -49,7 +49,7 @@ dependencies = [
|
||||
"base64 0.21.2",
|
||||
"bitflags 1.3.2",
|
||||
"brotli",
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"bytestring",
|
||||
"derive_more",
|
||||
"encoding_rs",
|
||||
@@ -119,7 +119,7 @@ dependencies = [
|
||||
"actix-utils",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"mio 0.8.7",
|
||||
"mio",
|
||||
"num_cpus",
|
||||
"socket2",
|
||||
"tokio",
|
||||
@@ -182,9 +182,9 @@ dependencies = [
|
||||
"actix-utils",
|
||||
"actix-web-codegen",
|
||||
"ahash 0.7.6",
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"bytestring",
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"cookie",
|
||||
"derive_more",
|
||||
"encoding_rs",
|
||||
@@ -251,7 +251,7 @@ version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "433cfd6710c9986c576a25ca913c39d66a6474107b406f34f91d4a8923395241"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"cipher",
|
||||
"cpufeatures",
|
||||
]
|
||||
@@ -273,7 +273,7 @@ version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"getrandom",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
@@ -428,54 +428,12 @@ dependencies = [
|
||||
"critical-section",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "attohttpc"
|
||||
version = "0.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fcf00bc6d5abb29b5f97e3c61a90b6d3caa12f3faf897d4a3e3607c050a35a7"
|
||||
dependencies = [
|
||||
"http",
|
||||
"log",
|
||||
"rustls 0.20.8",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"url",
|
||||
"webpki",
|
||||
"webpki-roots 0.22.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "aws-creds"
|
||||
version = "0.34.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3776743bb68d4ad02ba30ba8f64373f1be4e082fe47651767171ce75bb2f6cf5"
|
||||
dependencies = [
|
||||
"attohttpc",
|
||||
"dirs",
|
||||
"log",
|
||||
"quick-xml",
|
||||
"rust-ini",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"time",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-region"
|
||||
version = "0.25.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "056557a61427d0e5ba29dd931031c8ffed4ee7a550e7cd55692a9d8deb0a9dba"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.67"
|
||||
@@ -484,7 +442,7 @@ checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cc",
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide 0.6.2",
|
||||
"object",
|
||||
@@ -514,7 +472,7 @@ name = "benchmarks"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"convert_case 0.6.0",
|
||||
"criterion",
|
||||
"csv",
|
||||
@@ -654,12 +612,6 @@ version = "1.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "0.5.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.4.0"
|
||||
@@ -672,7 +624,7 @@ version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "238e4886760d98c4f899360c834fa93e62cf7f721ac3c2da375cbdf4b8679aae"
|
||||
dependencies = [
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -730,12 +682,6 @@ dependencies = [
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
@@ -951,7 +897,7 @@ version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1002,7 +948,7 @@ version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-epoch",
|
||||
@@ -1016,7 +962,7 @@ version = "0.5.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
@@ -1026,7 +972,7 @@ version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
@@ -1038,7 +984,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"memoffset",
|
||||
"scopeguard",
|
||||
@@ -1050,7 +996,7 @@ version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
@@ -1060,7 +1006,7 @@ version = "0.8.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1230,36 +1176,16 @@ dependencies = [
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs"
|
||||
version = "4.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059"
|
||||
dependencies = [
|
||||
"dirs-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-next"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf36e65a80337bea855cd4ef9b8401ffce06a7baedf2e85ec467b1ac3f6e82b6"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"dirs-sys-next",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-sys"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"redox_users",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-sys-next"
|
||||
version = "0.1.2"
|
||||
@@ -1268,15 +1194,9 @@ checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"redox_users",
|
||||
"winapi 0.3.9",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dlv-list"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257"
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.3.0"
|
||||
@@ -1299,14 +1219,14 @@ dependencies = [
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"time",
|
||||
"uuid 1.4.1",
|
||||
"uuid 1.3.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.9.0"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
||||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
@@ -1387,7 +1307,7 @@ version = "0.8.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1498,7 +1418,7 @@ dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"uuid 1.4.1",
|
||||
"uuid 1.3.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1507,7 +1427,7 @@ version = "0.2.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall 0.2.16",
|
||||
"windows-sys 0.48.0",
|
||||
@@ -1561,22 +1481,6 @@ version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a"
|
||||
|
||||
[[package]]
|
||||
name = "fuchsia-zircon"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"fuchsia-zircon-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuchsia-zircon-sys"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.28"
|
||||
@@ -1710,7 +1614,7 @@ version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
@@ -1769,7 +1673,7 @@ version = "0.3.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782"
|
||||
dependencies = [
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"fnv",
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
@@ -1901,7 +1805,7 @@ version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482"
|
||||
dependencies = [
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"fnv",
|
||||
"itoa",
|
||||
]
|
||||
@@ -1912,7 +1816,7 @@ version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
|
||||
dependencies = [
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"http",
|
||||
"pin-project-lite",
|
||||
]
|
||||
@@ -1941,7 +1845,7 @@ version = "0.14.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4"
|
||||
dependencies = [
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
@@ -2008,19 +1912,15 @@ dependencies = [
|
||||
"meilisearch-types",
|
||||
"nelson",
|
||||
"page_size 0.5.0",
|
||||
"parking_lot",
|
||||
"puffin",
|
||||
"roaring",
|
||||
"rust-s3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"synchronoise",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"time",
|
||||
"tokio",
|
||||
"uuid 1.4.1",
|
||||
"zookeeper",
|
||||
"uuid 1.3.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2075,7 +1975,7 @@ version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2104,15 +2004,6 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iovec"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ipnet"
|
||||
version = "2.7.2"
|
||||
@@ -2211,16 +2102,6 @@ dependencies = [
|
||||
"simple_asn1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kernel32-sys"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
dependencies = [
|
||||
"winapi 0.2.8",
|
||||
"winapi-build",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "language-tags"
|
||||
version = "0.3.2"
|
||||
@@ -2233,12 +2114,6 @@ version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "lazycell"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "levenshtein_automata"
|
||||
version = "0.2.1"
|
||||
@@ -2617,17 +2492,6 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||
|
||||
[[package]]
|
||||
name = "maybe-async"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f1b8c13cb1f814b634a96b2c725449fe7ed464a7b8781de8688be5ffbd3f305"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "md5"
|
||||
version = "0.7.0"
|
||||
@@ -2660,7 +2524,7 @@ dependencies = [
|
||||
"brotli",
|
||||
"bstr",
|
||||
"byte-unit",
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"cargo_toml",
|
||||
"clap",
|
||||
"crossbeam-channel",
|
||||
@@ -2705,7 +2569,6 @@ dependencies = [
|
||||
"rayon",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"rust-s3",
|
||||
"rustls 0.20.8",
|
||||
"rustls-pemfile",
|
||||
"segment",
|
||||
@@ -2728,12 +2591,11 @@ dependencies = [
|
||||
"tokio-stream",
|
||||
"toml",
|
||||
"urlencoding",
|
||||
"uuid 1.4.1",
|
||||
"uuid 1.3.3",
|
||||
"vergen",
|
||||
"walkdir",
|
||||
"yaup",
|
||||
"zip",
|
||||
"zookeeper",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2743,7 +2605,6 @@ dependencies = [
|
||||
"base64 0.21.2",
|
||||
"enum-iterator",
|
||||
"hmac",
|
||||
"log",
|
||||
"maplit",
|
||||
"meilisearch-types",
|
||||
"rand",
|
||||
@@ -2753,8 +2614,7 @@ dependencies = [
|
||||
"sha2",
|
||||
"thiserror",
|
||||
"time",
|
||||
"uuid 1.4.1",
|
||||
"zookeeper",
|
||||
"uuid 1.3.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2784,7 +2644,7 @@ dependencies = [
|
||||
"thiserror",
|
||||
"time",
|
||||
"tokio",
|
||||
"uuid 1.4.1",
|
||||
"uuid 1.3.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2864,7 +2724,7 @@ dependencies = [
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"time",
|
||||
"uuid 1.4.1",
|
||||
"uuid 1.3.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2916,25 +2776,6 @@ dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.6.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4afd66f5b91bf2a3bc13fad0e21caedac168ca4c707504e75585648ae80e4cc4"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"fuchsia-zircon",
|
||||
"fuchsia-zircon-sys",
|
||||
"iovec",
|
||||
"kernel32-sys",
|
||||
"libc",
|
||||
"log",
|
||||
"miow",
|
||||
"net2",
|
||||
"slab",
|
||||
"winapi 0.2.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.8.7"
|
||||
@@ -2947,46 +2788,11 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio-extras"
|
||||
version = "2.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52403fe290012ce777c4626790c8951324a2b9e3316b3143779c72b029742f19"
|
||||
dependencies = [
|
||||
"lazycell",
|
||||
"log",
|
||||
"mio 0.6.23",
|
||||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miow"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebd808424166322d4a38da87083bfddd3ac4c131334ed55856112eb06d46944d"
|
||||
dependencies = [
|
||||
"kernel32-sys",
|
||||
"net2",
|
||||
"winapi 0.2.8",
|
||||
"ws2_32-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nelson"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/meilisearch/nelson.git?rev=675f13885548fb415ead8fbb447e9e6d9314000a#675f13885548fb415ead8fbb447e9e6d9314000a"
|
||||
|
||||
[[package]]
|
||||
name = "net2"
|
||||
version = "0.2.39"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b13b648036a2339d06de780866fbdfda0dde886de7b3af2ddeba8b14f4ee34ac"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"libc",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
@@ -3014,7 +2820,7 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4"
|
||||
dependencies = [
|
||||
"winapi 0.3.9",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3094,16 +2900,6 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ordered-multimap"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ccd746e37177e1711c20dd619a1620f34f5c8b569c53590a72dedd5344d8924a"
|
||||
dependencies = [
|
||||
"dlv-list",
|
||||
"hashbrown 0.12.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "page_size"
|
||||
version = "0.4.2"
|
||||
@@ -3111,7 +2907,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi 0.3.9",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3121,7 +2917,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b7663cbd190cfd818d08efa8497f6cd383076688c49a391ef7c0d03cd12b561"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi 0.3.9",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3140,7 +2936,7 @@ version = "0.9.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall 0.2.16",
|
||||
"smallvec",
|
||||
@@ -3415,7 +3211,7 @@ version = "0.13.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "449811d15fbdf5ceb5c1144416066429cf82316e2ec8ce0c1f6f8a02e7bbcf8c"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"fnv",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
@@ -3441,7 +3237,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"instant",
|
||||
"lz4_flex",
|
||||
"once_cell",
|
||||
@@ -3461,16 +3257,6 @@ dependencies = [
|
||||
"puffin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.30"
|
||||
@@ -3601,7 +3387,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55"
|
||||
dependencies = [
|
||||
"base64 0.21.2",
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"encoding_rs",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
@@ -3651,7 +3437,7 @@ dependencies = [
|
||||
"spin 0.5.2",
|
||||
"untrusted",
|
||||
"web-sys",
|
||||
"winapi 0.3.9",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3678,45 +3464,6 @@ dependencies = [
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-ini"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"ordered-multimap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-s3"
|
||||
version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b2ac5ff6acfbe74226fa701b5ef793aaa054055c13ebb7060ad36942956e027"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"attohttpc",
|
||||
"aws-creds",
|
||||
"aws-region",
|
||||
"base64 0.13.1",
|
||||
"bytes 1.4.0",
|
||||
"cfg-if 1.0.0",
|
||||
"hex",
|
||||
"hmac",
|
||||
"http",
|
||||
"log",
|
||||
"maybe-async",
|
||||
"md5",
|
||||
"percent-encoding",
|
||||
"quick-xml",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"sha2",
|
||||
"thiserror",
|
||||
"time",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.23"
|
||||
@@ -3950,7 +3697,7 @@ version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
@@ -3961,7 +3708,7 @@ version = "0.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
@@ -3972,7 +3719,7 @@ version = "0.10.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
@@ -4052,12 +3799,6 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "snowflake"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "27207bb65232eda1f588cf46db2fee75c0808d557f6b3cf19a75f5d6d7c94df1"
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.4.9"
|
||||
@@ -4065,7 +3806,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi 0.3.9",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4167,13 +3908,13 @@ version = "0.28.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4c2f3ca6693feb29a89724516f016488e9aafc7f37264f898593ee4b942f31b"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
"ntapi",
|
||||
"once_cell",
|
||||
"rayon",
|
||||
"winapi 0.3.9",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4202,7 +3943,7 @@ version = "3.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"fastrand",
|
||||
"redox_syscall 0.3.5",
|
||||
"rustix 0.37.19",
|
||||
@@ -4297,9 +4038,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"libc",
|
||||
"mio 0.8.7",
|
||||
"mio",
|
||||
"num_cpus",
|
||||
"parking_lot",
|
||||
"pin-project-lite",
|
||||
@@ -4358,7 +4099,7 @@ version = "0.7.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d"
|
||||
dependencies = [
|
||||
"bytes 1.4.0",
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"pin-project-lite",
|
||||
@@ -4412,7 +4153,7 @@ version = "0.1.37"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"log",
|
||||
"pin-project-lite",
|
||||
"tracing-core",
|
||||
@@ -4548,9 +4289,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.4.1"
|
||||
version = "1.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d"
|
||||
checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"serde",
|
||||
@@ -4569,7 +4310,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f21b881cd6636ece9735721cf03c1fe1e774fe258683d084bb2812ab67435749"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"enum-iterator",
|
||||
"getset",
|
||||
"git2",
|
||||
@@ -4627,7 +4368,7 @@ version = "0.2.86"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"wasm-bindgen-macro",
|
||||
]
|
||||
|
||||
@@ -4652,7 +4393,7 @@ version = "0.4.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d1985d03709c53167ce907ff394f5316aa22cb4e12761295c5dc57dacb6297e"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
"web-sys",
|
||||
@@ -4735,12 +4476,6 @@ dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
@@ -4751,12 +4486,6 @@ dependencies = [
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-build"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
@@ -4769,7 +4498,7 @@ version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
|
||||
dependencies = [
|
||||
"winapi 0.3.9",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4925,17 +4654,7 @@ version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
|
||||
dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ws2_32-sys"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e"
|
||||
dependencies = [
|
||||
"winapi 0.2.8",
|
||||
"winapi-build",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5013,32 +4732,6 @@ dependencies = [
|
||||
"zstd 0.11.2+zstd.1.5.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zookeeper"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2312b424380193701a7341cec0551b80d2e3afd827ea0d3440af67899156ce10"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"bytes 0.5.6",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"mio 0.6.23",
|
||||
"mio-extras",
|
||||
"snowflake",
|
||||
"zookeeper_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zookeeper_derive"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42307291e3c8b2e4082e5647572da863f0470511d0ecb1618a4cd0a361549723"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.11.2+zstd.1.5.2"
|
||||
|
||||
21
Cargo.toml
21
Cargo.toml
@@ -36,3 +36,24 @@ opt-level = 3
|
||||
opt-level = 3
|
||||
[profile.dev.package.roaring]
|
||||
opt-level = 3
|
||||
|
||||
[profile.dev.package.lindera-ipadic-builder]
|
||||
opt-level = 3
|
||||
[profile.dev.package.encoding]
|
||||
opt-level = 3
|
||||
[profile.dev.package.yada]
|
||||
opt-level = 3
|
||||
|
||||
[profile.release.package.lindera-ipadic-builder]
|
||||
opt-level = 3
|
||||
[profile.release.package.encoding]
|
||||
opt-level = 3
|
||||
[profile.release.package.yada]
|
||||
opt-level = 3
|
||||
|
||||
[profile.bench.package.lindera-ipadic-builder]
|
||||
opt-level = 3
|
||||
[profile.bench.package.encoding]
|
||||
opt-level = 3
|
||||
[profile.bench.package.yada]
|
||||
opt-level = 3
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
version: "3.9"
|
||||
services:
|
||||
zk1:
|
||||
container_name: zk1
|
||||
hostname: zk1
|
||||
image: bitnami/zookeeper:3.7.1
|
||||
ports:
|
||||
- 21811:2181
|
||||
environment:
|
||||
- ALLOW_ANONYMOUS_LOGIN=yes
|
||||
- ZOO_SERVER_ID=1
|
||||
- ZOO_SERVERS=0.0.0.0:2888:3888,zk2:2888:3888,zk3:2888:3888
|
||||
zk2:
|
||||
container_name: zk2
|
||||
hostname: zk2
|
||||
image: bitnami/zookeeper:3.7.1
|
||||
ports:
|
||||
- 21812:2181
|
||||
environment:
|
||||
- ALLOW_ANONYMOUS_LOGIN=yes
|
||||
- ZOO_SERVER_ID=2
|
||||
- ZOO_SERVERS=zk1:2888:3888,0.0.0.0:2888:3888,zk3:2888:3888
|
||||
zk3:
|
||||
container_name: zk3
|
||||
hostname: zk3
|
||||
image: bitnami/zookeeper:3.7.1
|
||||
ports:
|
||||
- 21813:2181
|
||||
environment:
|
||||
- ALLOW_ANONYMOUS_LOGIN=yes
|
||||
- ZOO_SERVER_ID=3
|
||||
- ZOO_SERVERS=zk1:2888:3888,zk2:2888:3888,0.0.0.0:2888:3888
|
||||
zoonavigator:
|
||||
container_name: zoonavigator
|
||||
image: elkozmon/zoonavigator
|
||||
ports:
|
||||
- 9000:9000
|
||||
|
||||
# Meilisearch instances
|
||||
# m1:
|
||||
# container_name: m1
|
||||
# hostname: m1
|
||||
# image: getmeili/meilisearch:prototype-zookeeper-ha-0
|
||||
# ports:
|
||||
# - 7700:7700
|
||||
# environment:
|
||||
# - MEILI_ZK_URL=zk1:2181
|
||||
# - MEILI_MASTER_KEY=masterkey
|
||||
# restart: always
|
||||
# m2:
|
||||
# container_name: m2
|
||||
# hostname: m2
|
||||
# image: getmeili/meilisearch:prototype-zookeeper-ha-0
|
||||
# ports:
|
||||
# - 7701:7700
|
||||
# environment:
|
||||
# - MEILI_ZK_URL=zk2:2181
|
||||
# - MEILI_MASTER_KEY=masterkey
|
||||
# restart: always
|
||||
@@ -262,6 +262,9 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
|
||||
@@ -340,6 +340,9 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
}
|
||||
},
|
||||
stop_words: settings.stop_words.into(),
|
||||
non_separator_tokens: v6::Setting::NotSet,
|
||||
separator_tokens: v6::Setting::NotSet,
|
||||
dictionary: v6::Setting::NotSet,
|
||||
synonyms: settings.synonyms.into(),
|
||||
distinct_attribute: settings.distinct_attribute.into(),
|
||||
typo_tolerance: match settings.typo_tolerance {
|
||||
|
||||
@@ -22,6 +22,20 @@ pub enum Error {
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl Deref for File {
|
||||
type Target = NamedTempFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for File {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.file
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FileStore {
|
||||
path: PathBuf,
|
||||
@@ -132,20 +146,6 @@ impl File {
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for File {
|
||||
type Target = NamedTempFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for File {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.file
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::io::Write;
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
function is_everything_installed {
|
||||
everything_ok=yes
|
||||
|
||||
if hash zkli 2>/dev/null; then
|
||||
echo "✅ zkli installed"
|
||||
else
|
||||
everything_ok=no
|
||||
echo "🥺 zkli is missing, please run \`cargo install zkli\`"
|
||||
fi
|
||||
|
||||
if hash s3cmd 2>/dev/null; then
|
||||
echo "✅ s3cmd installed"
|
||||
else
|
||||
everything_ok=no
|
||||
echo "🥺 s3cmd is missing, see how to install it here https://s3tools.org/s3cmd"
|
||||
fi
|
||||
|
||||
if [ $everything_ok = "no" ]; then
|
||||
echo "Exiting..."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# param: addr of zookeeper
|
||||
function connect_to_zookeeper {
|
||||
if ! zkli -a "$1" ls > /dev/null; then
|
||||
echo "zkli can't connect"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# param: addr of the s3 bucket
|
||||
function connect_to_s3 {
|
||||
# S3_SECRET_KEY
|
||||
# S3_ACCESS_KEY
|
||||
# S3_HOST
|
||||
# S3_BUCKET
|
||||
|
||||
s3cmd --host="$S3_HOST" --host-bucket="$S3_BUCKET" --access_key="$ACCESS_KEY" --secret_key="$S3_SECRET_KEY" ls
|
||||
|
||||
if $?; then
|
||||
echo "s3cmd can't connect"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
is_everything_installed
|
||||
|
||||
ZOOKEEPER_ADDR="localhost:2181"
|
||||
if ! connect_to_zookeeper $ZOOKEEPER_ADDR; then
|
||||
ZOOKEEPER_ADDR="localhost:21811"
|
||||
if ! connect_to_zookeeper $ZOOKEEPER_ADDR; then
|
||||
echo "Can't connect to zkli"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
connect_to_s3
|
||||
@@ -31,10 +31,6 @@ tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
tokio = { version = "1.27.0", features = ["full"] }
|
||||
zookeeper = "0.8.0"
|
||||
parking_lot = "0.12.1"
|
||||
rust-s3 = { version = "0.33.0", default-features = false, features = ["sync-rustls-tls"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -43,7 +43,7 @@ use uuid::Uuid;
|
||||
|
||||
use crate::autobatcher::{self, BatchKind};
|
||||
use crate::utils::{self, swap_index_uid_in_task};
|
||||
use crate::{Error, IndexSchedulerInner, ProcessingTasks, Result, TaskId};
|
||||
use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
|
||||
|
||||
/// Represents a combination of tasks that can all be processed at the same time.
|
||||
///
|
||||
@@ -198,35 +198,6 @@ impl Batch {
|
||||
| IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the content fields uuids associated with this batch.
|
||||
pub fn content_uuids(&self) -> Vec<Uuid> {
|
||||
match self {
|
||||
Batch::TaskCancelation { .. }
|
||||
| Batch::TaskDeletion(_)
|
||||
| Batch::Dump(_)
|
||||
| Batch::IndexCreation { .. }
|
||||
| Batch::IndexDocumentDeletionByFilter { .. }
|
||||
| Batch::IndexUpdate { .. }
|
||||
| Batch::SnapshotCreation(_)
|
||||
| Batch::IndexDeletion { .. }
|
||||
| Batch::IndexSwap { .. } => vec![],
|
||||
Batch::IndexOperation { op, .. } => match op {
|
||||
IndexOperation::DocumentOperation { operations, .. } => operations
|
||||
.iter()
|
||||
.flat_map(|op| match op {
|
||||
DocumentOperation::Add(uuid) => Some(*uuid),
|
||||
DocumentOperation::Delete(_) => None,
|
||||
})
|
||||
.collect(),
|
||||
IndexOperation::DocumentDeletion { .. }
|
||||
| IndexOperation::Settings { .. }
|
||||
| IndexOperation::DocumentClear { .. }
|
||||
| IndexOperation::SettingsAndDocumentOperation { .. }
|
||||
| IndexOperation::DocumentClearAndSetting { .. } => vec![],
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexOperation {
|
||||
@@ -242,7 +213,7 @@ impl IndexOperation {
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexSchedulerInner {
|
||||
impl IndexScheduler {
|
||||
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
|
||||
///
|
||||
/// ## Arguments
|
||||
@@ -509,7 +480,8 @@ impl IndexSchedulerInner {
|
||||
if let Some(task_id) = to_cancel.max() {
|
||||
// We retrieve the tasks that were processing before this tasks cancelation started.
|
||||
// We must *not* reset the processing tasks before calling this method.
|
||||
let ProcessingTasks { started_at, processing, .. } = &*self.processing_tasks.read();
|
||||
let ProcessingTasks { started_at, processing } =
|
||||
&*self.processing_tasks.read().unwrap();
|
||||
return Ok(Some(Batch::TaskCancelation {
|
||||
task: self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?,
|
||||
previous_started_at: *started_at,
|
||||
@@ -1420,7 +1392,7 @@ impl IndexSchedulerInner {
|
||||
fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
|
||||
// 1. Remove from this list the tasks that we are not allowed to delete
|
||||
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
|
||||
let processing_tasks = &self.processing_tasks.read().processing.clone();
|
||||
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
|
||||
|
||||
let all_task_ids = self.all_task_ids(wtxn)?;
|
||||
let mut to_delete_tasks = all_task_ids & matched_tasks;
|
||||
|
||||
@@ -295,11 +295,6 @@ impl IndexMap {
|
||||
"Attempt to finish deletion of an index that was being closed"
|
||||
);
|
||||
}
|
||||
|
||||
/// Returns the indexes that were opened by the `IndexMap`.
|
||||
pub fn clear(&mut self) -> Vec<Index> {
|
||||
self.available.clear().into_iter().map(|(_, (_, index))| index).collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create or open an index in the specified path.
|
||||
@@ -340,8 +335,7 @@ mod tests {
|
||||
impl IndexMapper {
|
||||
fn test() -> (Self, Env, IndexSchedulerHandle) {
|
||||
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
|
||||
let index_scheduler = index_scheduler.inner();
|
||||
(index_scheduler.index_mapper.clone(), index_scheduler.env.clone(), handle)
|
||||
(index_scheduler.index_mapper, index_scheduler.env, handle)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ pub struct IndexMapper {
|
||||
pub(crate) index_stats: Database<UuidCodec, SerdeJson<IndexStats>>,
|
||||
|
||||
/// Path to the folder where the LMDB environments of each index are.
|
||||
pub(crate) base_path: PathBuf,
|
||||
base_path: PathBuf,
|
||||
/// The map size an index is opened with on the first time.
|
||||
index_base_map_size: usize,
|
||||
/// The quantity by which the map size of an index is incremented upon reopening, in bytes.
|
||||
@@ -135,7 +135,7 @@ impl IndexMapper {
|
||||
index_growth_amount: usize,
|
||||
index_count: usize,
|
||||
enable_mdb_writemap: bool,
|
||||
indexer_config: Arc<IndexerConfig>,
|
||||
indexer_config: IndexerConfig,
|
||||
) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
|
||||
@@ -150,7 +150,7 @@ impl IndexMapper {
|
||||
index_base_map_size,
|
||||
index_growth_amount,
|
||||
enable_mdb_writemap,
|
||||
indexer_config,
|
||||
indexer_config: Arc::new(indexer_config),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -428,11 +428,6 @@ impl IndexMapper {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the indexes that were opened by the `IndexMapper`.
|
||||
pub fn clear(&mut self) -> Vec<Index> {
|
||||
self.index_map.write().unwrap().clear()
|
||||
}
|
||||
|
||||
/// The stats of an index.
|
||||
///
|
||||
/// If available in the cache, they are directly returned.
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::Write;
|
||||
use std::ops::Deref;
|
||||
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
@@ -9,13 +8,12 @@ use meilisearch_types::tasks::{Details, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::{IndexScheduler, IndexSchedulerInner, Kind, Status, BEI128};
|
||||
use crate::{IndexScheduler, Kind, Status, BEI128};
|
||||
|
||||
pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
scheduler.assert_internally_consistent();
|
||||
|
||||
let inner = scheduler.inner();
|
||||
let IndexSchedulerInner {
|
||||
let IndexScheduler {
|
||||
autobatching_enabled,
|
||||
must_stop_processing: _,
|
||||
processing_tasks,
|
||||
@@ -40,15 +38,13 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
test_breakpoint_sdr: _,
|
||||
planned_failures: _,
|
||||
run_loop_iteration: _,
|
||||
zookeeper: _,
|
||||
options: _,
|
||||
} = inner.deref();
|
||||
} = scheduler;
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
|
||||
let mut snap = String::new();
|
||||
|
||||
let processing_tasks = processing_tasks.read().processing.clone();
|
||||
let processing_tasks = processing_tasks.read().unwrap().processing.clone();
|
||||
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
|
||||
snap.push_str("### Processing Tasks:\n");
|
||||
snap.push_str(&snapshot_bitmap(&processing_tasks));
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,5 @@
|
||||
//! Thread-safe `Vec`-backend LRU cache using [`std::sync::atomic::AtomicU64`] for synchronization.
|
||||
|
||||
use std::mem;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
/// Thread-safe `Vec`-backend LRU cache
|
||||
@@ -191,11 +190,6 @@ where
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns the generation associated to the key and values of the `LruMap`.
|
||||
pub fn clear(&mut self) -> Vec<(AtomicU64, (K, V))> {
|
||||
mem::take(&mut self.0.data)
|
||||
}
|
||||
}
|
||||
|
||||
/// The result of an insertion in a LRU map.
|
||||
|
||||
@@ -10,9 +10,9 @@ use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::{Error, IndexSchedulerInner, Result, Task, TaskId, BEI128};
|
||||
use crate::{Error, IndexScheduler, Result, Task, TaskId, BEI128};
|
||||
|
||||
impl IndexSchedulerInner {
|
||||
impl IndexScheduler {
|
||||
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
|
||||
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
|
||||
}
|
||||
@@ -331,12 +331,11 @@ pub fn clamp_to_page_size(size: usize) -> usize {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl crate::IndexScheduler {
|
||||
impl IndexScheduler {
|
||||
/// Asserts that the index scheduler's content is internally consistent.
|
||||
pub fn assert_internally_consistent(&self) {
|
||||
let this = self.inner();
|
||||
let rtxn = this.env.read_txn().unwrap();
|
||||
for task in this.all_tasks.iter(&rtxn).unwrap() {
|
||||
let rtxn = self.env.read_txn().unwrap();
|
||||
for task in self.all_tasks.iter(&rtxn).unwrap() {
|
||||
let (task_id, task) = task.unwrap();
|
||||
let task_id = task_id.get();
|
||||
|
||||
@@ -355,21 +354,21 @@ impl crate::IndexScheduler {
|
||||
} = task;
|
||||
assert_eq!(uid, task.uid);
|
||||
if let Some(task_index_uid) = &task_index_uid {
|
||||
assert!(this
|
||||
assert!(self
|
||||
.index_tasks
|
||||
.get(&rtxn, task_index_uid.as_str())
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.contains(task.uid));
|
||||
}
|
||||
let db_enqueued_at = this
|
||||
let db_enqueued_at = self
|
||||
.enqueued_at
|
||||
.get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert!(db_enqueued_at.contains(task_id));
|
||||
if let Some(started_at) = started_at {
|
||||
let db_started_at = this
|
||||
let db_started_at = self
|
||||
.started_at
|
||||
.get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos()))
|
||||
.unwrap()
|
||||
@@ -377,7 +376,7 @@ impl crate::IndexScheduler {
|
||||
assert!(db_started_at.contains(task_id));
|
||||
}
|
||||
if let Some(finished_at) = finished_at {
|
||||
let db_finished_at = this
|
||||
let db_finished_at = self
|
||||
.finished_at
|
||||
.get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos()))
|
||||
.unwrap()
|
||||
@@ -385,9 +384,9 @@ impl crate::IndexScheduler {
|
||||
assert!(db_finished_at.contains(task_id));
|
||||
}
|
||||
if let Some(canceled_by) = canceled_by {
|
||||
let db_canceled_tasks = this.get_status(&rtxn, Status::Canceled).unwrap();
|
||||
let db_canceled_tasks = self.get_status(&rtxn, Status::Canceled).unwrap();
|
||||
assert!(db_canceled_tasks.contains(uid));
|
||||
let db_canceling_task = this.get_task(&rtxn, canceled_by).unwrap().unwrap();
|
||||
let db_canceling_task = self.get_task(&rtxn, canceled_by).unwrap().unwrap();
|
||||
assert_eq!(db_canceling_task.status, Status::Succeeded);
|
||||
match db_canceling_task.kind {
|
||||
KindWithContent::TaskCancelation { query: _, tasks } => {
|
||||
@@ -428,7 +427,7 @@ impl crate::IndexScheduler {
|
||||
Details::IndexInfo { primary_key: pk1 } => match &kind {
|
||||
KindWithContent::IndexCreation { index_uid, primary_key: pk2 }
|
||||
| KindWithContent::IndexUpdate { index_uid, primary_key: pk2 } => {
|
||||
this.index_tasks
|
||||
self.index_tasks
|
||||
.get(&rtxn, index_uid.as_str())
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
@@ -536,23 +535,23 @@ impl crate::IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
assert!(this.get_status(&rtxn, status).unwrap().contains(uid));
|
||||
assert!(this.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid));
|
||||
assert!(self.get_status(&rtxn, status).unwrap().contains(uid));
|
||||
assert!(self.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid));
|
||||
|
||||
if let KindWithContent::DocumentAdditionOrUpdate { content_file, .. } = kind {
|
||||
match status {
|
||||
Status::Enqueued | Status::Processing => {
|
||||
assert!(this
|
||||
assert!(self
|
||||
.file_store
|
||||
.all_uuids()
|
||||
.unwrap()
|
||||
.any(|uuid| uuid.as_ref().unwrap() == &content_file),
|
||||
"Could not find uuid `{content_file}` in the file_store. Available uuids are {:?}.",
|
||||
this.file_store.all_uuids().unwrap().collect::<std::result::Result<Vec<_>, file_store::Error>>().unwrap(),
|
||||
self.file_store.all_uuids().unwrap().collect::<std::result::Result<Vec<_>, file_store::Error>>().unwrap(),
|
||||
);
|
||||
}
|
||||
Status::Succeeded | Status::Failed | Status::Canceled => {
|
||||
assert!(this
|
||||
assert!(self
|
||||
.file_store
|
||||
.all_uuids()
|
||||
.unwrap()
|
||||
|
||||
@@ -14,7 +14,6 @@ license.workspace = true
|
||||
base64 = "0.21.0"
|
||||
enum-iterator = "1.4.0"
|
||||
hmac = "0.12.1"
|
||||
log = "0.4.19"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
@@ -25,4 +24,3 @@ sha2 = "0.10.6"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
zookeeper = "0.8.0"
|
||||
|
||||
@@ -19,7 +19,6 @@ internal_error!(
|
||||
AuthControllerError: meilisearch_types::milli::heed::Error,
|
||||
std::io::Error,
|
||||
serde_json::Error,
|
||||
zookeeper::ZkError,
|
||||
std::str::Utf8Error
|
||||
);
|
||||
|
||||
|
||||
@@ -16,113 +16,22 @@ pub use store::open_auth_store_env;
|
||||
use store::{generate_key_as_hexa, HeedAuthStore};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
use zookeeper::{
|
||||
Acl, AddWatchMode, CreateMode, WatchedEvent, WatchedEventType, ZkError, ZooKeeper,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AuthController {
|
||||
store: Arc<HeedAuthStore>,
|
||||
master_key: Option<String>,
|
||||
zookeeper: Option<Arc<ZooKeeper>>,
|
||||
}
|
||||
|
||||
impl AuthController {
|
||||
pub fn new(
|
||||
db_path: impl AsRef<Path>,
|
||||
master_key: &Option<String>,
|
||||
zookeeper: Option<Arc<ZooKeeper>>,
|
||||
) -> Result<Self> {
|
||||
pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> {
|
||||
let store = HeedAuthStore::new(db_path)?;
|
||||
let controller = Self { store: Arc::new(store), master_key: master_key.clone(), zookeeper };
|
||||
|
||||
match controller.zookeeper {
|
||||
// setup the auth zk environment, the `auth` node
|
||||
Some(ref zookeeper) => {
|
||||
// Zookeeper Event listener loop
|
||||
let controller_clone = controller.clone();
|
||||
let zkk = zookeeper.clone();
|
||||
zookeeper.add_watch("/auth", AddWatchMode::PersistentRecursive, move |event| {
|
||||
let WatchedEvent { event_type, path, keeper_state: _ } = dbg!(event);
|
||||
|
||||
match event_type {
|
||||
WatchedEventType::NodeDeleted => {
|
||||
// TODO: ugly unwraps
|
||||
let path = path.unwrap();
|
||||
let uuid = path.strip_prefix("/auth/").unwrap();
|
||||
let uuid = Uuid::parse_str(&uuid).unwrap();
|
||||
log::info!("The key {} has been deleted", uuid);
|
||||
controller_clone.store.delete_api_key(uuid).unwrap();
|
||||
}
|
||||
WatchedEventType::NodeCreated | WatchedEventType::NodeDataChanged => {
|
||||
let path = path.unwrap();
|
||||
if path.strip_prefix("/auth/").map_or(false, |s| !s.is_empty()) {
|
||||
let (key, _stat) = zkk.get_data(&path, false).unwrap();
|
||||
let key: Key = serde_json::from_slice(&key).unwrap();
|
||||
log::info!("The key {} has been deleted", key.uid);
|
||||
controller_clone.store.put_api_key(key).unwrap();
|
||||
}
|
||||
}
|
||||
otherwise => panic!("Got the unexpected `{otherwise:?}` event!"),
|
||||
}
|
||||
})?;
|
||||
|
||||
// TODO: we should catch the potential unexpected errors here https://docs.rs/zookeeper-client/latest/zookeeper_client/struct.Client.html#method.create
|
||||
// for the moment we consider that `create` only returns Error::NodeExists.
|
||||
match zookeeper.create(
|
||||
"/auth",
|
||||
vec![],
|
||||
Acl::open_unsafe().clone(),
|
||||
CreateMode::Persistent,
|
||||
) {
|
||||
// If the store is empty, we must generate and push the default api-keys.
|
||||
Ok(_) => generate_default_keys(&controller)?,
|
||||
// If the node exist we should clear our DB and download all the existing api-keys
|
||||
Err(ZkError::NodeExists) => {
|
||||
log::warn!("Auth directory already exists, we need to clear our keys + import the one in zookeeper");
|
||||
|
||||
let store = controller.store.clone();
|
||||
store.delete_all_keys()?;
|
||||
let children = zookeeper
|
||||
.get_children("/auth", false)
|
||||
.expect("Internal, the auth directory was deleted during execution.");
|
||||
|
||||
log::info!("Importing {} api-keys", children.len());
|
||||
for path in children {
|
||||
log::info!(" Importing {}", path);
|
||||
match zookeeper.get_data(&format!("/auth/{}", &path), false) {
|
||||
Ok((key, _stat)) => {
|
||||
let key = serde_json::from_slice(&key).unwrap();
|
||||
let store = controller.store.clone();
|
||||
store.put_api_key(key)?;
|
||||
}
|
||||
Err(e) => panic!("{e}"),
|
||||
}
|
||||
// else the file was deleted while we were inserting the key. We ignore it.
|
||||
// TODO: What happens if someone updates the files before we have the time
|
||||
// to setup the watcher
|
||||
}
|
||||
}
|
||||
e @ Err(
|
||||
ZkError::NoNode | ZkError::NoChildrenForEphemerals | ZkError::InvalidACL,
|
||||
) => unreachable!("{e:?}"),
|
||||
Err(e) => panic!("{e}"),
|
||||
}
|
||||
// TODO: Race condition above:
|
||||
// What happens if two node join exactly at the same moment:
|
||||
// One will create the dir
|
||||
// The second one will delete its DB, load nothing and install a watcher
|
||||
// The first one will push its keys and should wake up and update the second one.
|
||||
// / BUT, if the second one delete its DB and the first one push its files before the second one install the watcher we're fucked
|
||||
}
|
||||
None => {
|
||||
if controller.store.is_empty()? {
|
||||
generate_default_keys(&controller)?;
|
||||
}
|
||||
}
|
||||
if store.is_empty()? {
|
||||
generate_default_keys(&store)?;
|
||||
}
|
||||
|
||||
Ok(controller)
|
||||
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth controller is able to access one of its database.
|
||||
@@ -144,24 +53,7 @@ impl AuthController {
|
||||
pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
|
||||
match self.store.get_api_key(create_key.uid)? {
|
||||
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),
|
||||
None => self.put_key(create_key.to_key()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_key(&self, key: Key) -> Result<Key> {
|
||||
let store = self.store.clone();
|
||||
match &self.zookeeper {
|
||||
Some(zookeeper) => {
|
||||
zookeeper.create(
|
||||
&format!("/auth/{}", key.uid),
|
||||
serde_json::to_vec_pretty(&key)?,
|
||||
Acl::open_unsafe().clone(),
|
||||
CreateMode::Persistent,
|
||||
)?;
|
||||
|
||||
Ok(key)
|
||||
}
|
||||
None => store.put_api_key(key),
|
||||
None => self.store.put_api_key(create_key.to_key()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -176,20 +68,7 @@ impl AuthController {
|
||||
name => key.name = name.set(),
|
||||
};
|
||||
key.updated_at = OffsetDateTime::now_utc();
|
||||
let store = self.store.clone();
|
||||
// TODO: we may commit only after zk persisted the keys
|
||||
match &self.zookeeper {
|
||||
Some(zookeeper) => {
|
||||
zookeeper.set_data(
|
||||
&format!("/auth/{}", key.uid),
|
||||
serde_json::to_vec_pretty(&key)?,
|
||||
None,
|
||||
)?;
|
||||
|
||||
Ok(key)
|
||||
}
|
||||
None => store.put_api_key(key),
|
||||
}
|
||||
self.store.put_api_key(key)
|
||||
}
|
||||
|
||||
pub fn get_key(&self, uid: Uuid) -> Result<Key> {
|
||||
@@ -231,19 +110,7 @@ impl AuthController {
|
||||
}
|
||||
|
||||
pub fn delete_key(&self, uid: Uuid) -> Result<()> {
|
||||
let deleted = match &self.zookeeper {
|
||||
Some(zookeeper) => match zookeeper.delete(&format!("/auth/{}", uid), None) {
|
||||
Ok(()) => true,
|
||||
Err(ZkError::NoNode) => false,
|
||||
Err(e) => return Err(e.into()),
|
||||
},
|
||||
None => {
|
||||
let store = self.store.clone();
|
||||
store.delete_api_key(uid)?
|
||||
}
|
||||
};
|
||||
|
||||
if deleted {
|
||||
if self.store.delete_api_key(uid)? {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(AuthControllerError::ApiKeyNotFound(uid.to_string()))
|
||||
@@ -292,7 +159,7 @@ impl AuthController {
|
||||
self.store.delete_all_keys()
|
||||
}
|
||||
|
||||
/// Insert a key in the DB without any check on its validity
|
||||
/// Delete all the keys in the DB.
|
||||
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
|
||||
self.store.put_api_key(key)?;
|
||||
Ok(())
|
||||
@@ -437,9 +304,10 @@ pub struct IndexSearchRules {
|
||||
pub filter: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
fn generate_default_keys(controller: &AuthController) -> Result<()> {
|
||||
controller.put_key(Key::default_admin())?;
|
||||
controller.put_key(Key::default_search())?;
|
||||
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
|
||||
store.put_api_key(Key::default_admin())?;
|
||||
store.put_api_key(Key::default_search())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -259,6 +259,9 @@ InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsStopWords , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsNonSeparatorTokens , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSeparatorTokens , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDictionary , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSynonyms , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsTypoTolerance , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidState , Internal , INTERNAL_SERVER_ERROR ;
|
||||
|
||||
@@ -171,6 +171,15 @@ pub struct Settings<T> {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsStopWords>)]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsNonSeparatorTokens>)]
|
||||
pub non_separator_tokens: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsSeparatorTokens>)]
|
||||
pub separator_tokens: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsDictionary>)]
|
||||
pub dictionary: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsSynonyms>)]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
@@ -201,6 +210,9 @@ impl Settings<Checked> {
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
non_separator_tokens: Setting::Reset,
|
||||
separator_tokens: Setting::Reset,
|
||||
dictionary: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
typo_tolerance: Setting::Reset,
|
||||
faceting: Setting::Reset,
|
||||
@@ -217,6 +229,9 @@ impl Settings<Checked> {
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
@@ -232,6 +247,9 @@ impl Settings<Checked> {
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
@@ -274,6 +292,9 @@ impl Settings<Unchecked> {
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
non_separator_tokens: self.non_separator_tokens,
|
||||
separator_tokens: self.separator_tokens,
|
||||
dictionary: self.dictionary,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
typo_tolerance: self.typo_tolerance,
|
||||
faceting: self.faceting,
|
||||
@@ -335,6 +356,28 @@ pub fn apply_settings_to_builder(
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.non_separator_tokens {
|
||||
Setting::Set(ref non_separator_tokens) => {
|
||||
builder.set_non_separator_tokens(non_separator_tokens.clone())
|
||||
}
|
||||
Setting::Reset => builder.reset_non_separator_tokens(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.separator_tokens {
|
||||
Setting::Set(ref separator_tokens) => {
|
||||
builder.set_separator_tokens(separator_tokens.clone())
|
||||
}
|
||||
Setting::Reset => builder.reset_separator_tokens(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.dictionary {
|
||||
Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
|
||||
Setting::Reset => builder.reset_dictionary(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.synonyms {
|
||||
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
|
||||
Setting::Reset => builder.reset_synonyms(),
|
||||
@@ -459,15 +502,14 @@ pub fn settings(
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
|
||||
let non_separator_tokens = index.non_separator_tokens(rtxn)?.unwrap_or_default();
|
||||
let separator_tokens = index.separator_tokens(rtxn)?.unwrap_or_default();
|
||||
let dictionary = index.dictionary(rtxn)?.unwrap_or_default();
|
||||
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||
|
||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||
// this information we are going to put space between words.
|
||||
let synonyms = index
|
||||
.synonyms(rtxn)?
|
||||
.iter()
|
||||
.map(|(key, values)| (key.join(" "), values.iter().map(|value| value.join(" ")).collect()))
|
||||
.collect();
|
||||
let synonyms = index.user_defined_synonyms(rtxn)?;
|
||||
|
||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
|
||||
@@ -520,6 +562,9 @@ pub fn settings(
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
non_separator_tokens: Setting::Set(non_separator_tokens),
|
||||
separator_tokens: Setting::Set(separator_tokens),
|
||||
dictionary: Setting::Set(dictionary),
|
||||
distinct_attribute: match distinct_field {
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
@@ -642,6 +687,9 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
@@ -663,6 +711,9 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
|
||||
@@ -80,7 +80,6 @@ reqwest = { version = "0.11.16", features = [
|
||||
], default-features = false }
|
||||
rustls = "0.20.8"
|
||||
rustls-pemfile = "1.0.2"
|
||||
rust-s3 = { version = "0.33.0", default-features = false, features = ["sync-rustls-tls"] }
|
||||
segment = { version = "0.2.2", optional = true }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
@@ -106,7 +105,6 @@ walkdir = "2.3.3"
|
||||
yaup = "0.2.1"
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.2.0"
|
||||
zookeeper = "0.8.0"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.8.0"
|
||||
|
||||
@@ -312,13 +312,6 @@ impl From<Opt> for Infos {
|
||||
config_file_path,
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
no_analytics: _,
|
||||
zk_url: _,
|
||||
s3_url: _,
|
||||
s3_region: _,
|
||||
s3_bucket: _,
|
||||
s3_access_key: _,
|
||||
s3_secret_key: _,
|
||||
s3_security_token: _,
|
||||
} = options;
|
||||
|
||||
let schedule_snapshot = match schedule_snapshot {
|
||||
|
||||
@@ -39,9 +39,6 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
|
||||
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
|
||||
pub use option::Opt;
|
||||
use option::ScheduleSnapshot;
|
||||
use s3::creds::Credentials;
|
||||
use s3::{Bucket, Region};
|
||||
use zookeeper::ZooKeeper;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
@@ -139,17 +136,14 @@ enum OnFailure {
|
||||
KeepDb,
|
||||
}
|
||||
|
||||
pub async fn setup_meilisearch(
|
||||
opt: &Opt,
|
||||
zookeeper: Option<Arc<ZooKeeper>>,
|
||||
) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
let empty_db = is_empty_db(&opt.db_path);
|
||||
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
|
||||
let snapshot_path_exists = snapshot_path.exists();
|
||||
// the db is empty and the snapshot exists, import it
|
||||
if empty_db && snapshot_path_exists {
|
||||
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
|
||||
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb, zookeeper)?,
|
||||
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
|
||||
Err(e) => {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
return Err(e);
|
||||
@@ -166,14 +160,14 @@ pub async fn setup_meilisearch(
|
||||
bail!("snapshot doesn't exist at {}", snapshot_path.display())
|
||||
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
|
||||
} else {
|
||||
open_or_create_database(opt, empty_db, zookeeper)?
|
||||
open_or_create_database(opt, empty_db)?
|
||||
}
|
||||
} else if let Some(ref path) = opt.import_dump {
|
||||
let src_path_exists = path.exists();
|
||||
// the db is empty and the dump exists, import it
|
||||
if empty_db && src_path_exists {
|
||||
let (mut index_scheduler, mut auth_controller) =
|
||||
open_or_create_database_unchecked(opt, OnFailure::RemoveDb, zookeeper)?;
|
||||
open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
|
||||
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
|
||||
Ok(()) => (index_scheduler, auth_controller),
|
||||
Err(e) => {
|
||||
@@ -193,10 +187,10 @@ pub async fn setup_meilisearch(
|
||||
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
|
||||
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
|
||||
} else {
|
||||
open_or_create_database(opt, empty_db, zookeeper)?
|
||||
open_or_create_database(opt, empty_db)?
|
||||
}
|
||||
} else {
|
||||
open_or_create_database(opt, empty_db, zookeeper)?
|
||||
open_or_create_database(opt, empty_db)?
|
||||
};
|
||||
|
||||
// We create a loop in a thread that registers snapshotCreation tasks
|
||||
@@ -205,12 +199,15 @@ pub async fn setup_meilisearch(
|
||||
if let ScheduleSnapshot::Enabled(snapshot_delay) = opt.schedule_snapshot {
|
||||
let snapshot_delay = Duration::from_secs(snapshot_delay);
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
thread::spawn(move || loop {
|
||||
thread::sleep(snapshot_delay);
|
||||
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
|
||||
error!("Error while registering snapshot: {}", e);
|
||||
}
|
||||
});
|
||||
thread::Builder::new()
|
||||
.name(String::from("register-snapshot-tasks"))
|
||||
.spawn(move || loop {
|
||||
thread::sleep(snapshot_delay);
|
||||
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
|
||||
error!("Error while registering snapshot: {}", e);
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
Ok((index_scheduler, auth_controller))
|
||||
@@ -220,52 +217,34 @@ pub async fn setup_meilisearch(
|
||||
fn open_or_create_database_unchecked(
|
||||
opt: &Opt,
|
||||
on_failure: OnFailure,
|
||||
zookeeper: Option<Arc<ZooKeeper>>,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
// we don't want to create anything in the data.ms yet, thus we
|
||||
// wrap our two builders in a closure that'll be executed later.
|
||||
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key, zookeeper.clone());
|
||||
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
|
||||
let instance_features = opt.to_instance_features();
|
||||
let index_scheduler = IndexScheduler::new(Arc::new(IndexSchedulerOptions {
|
||||
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
||||
auth_path: opt.db_path.join("auth"),
|
||||
tasks_path: opt.db_path.join("tasks"),
|
||||
update_file_path: opt.db_path.join("update_files"),
|
||||
indexes_path: opt.db_path.join("indexes"),
|
||||
snapshots_path: opt.snapshot_dir.clone(),
|
||||
dumps_path: opt.dump_dir.clone(),
|
||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into().map(Arc::new)?,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features,
|
||||
zookeeper: zookeeper.clone(),
|
||||
s3: opt.s3_url.as_ref().map(|url| {
|
||||
Arc::new(
|
||||
Bucket::new(
|
||||
opt.s3_bucket.as_deref().unwrap(),
|
||||
Region::Custom { region: opt.s3_region.clone(), endpoint: url.clone() },
|
||||
Credentials {
|
||||
access_key: opt.s3_access_key.clone(),
|
||||
secret_key: opt.s3_secret_key.clone(),
|
||||
security_token: opt.s3_security_token.clone(),
|
||||
session_token: None,
|
||||
expiration: None,
|
||||
},
|
||||
)
|
||||
.unwrap()
|
||||
.with_path_style(),
|
||||
)
|
||||
}),
|
||||
}))
|
||||
.map_err(anyhow::Error::from);
|
||||
let index_scheduler_builder = || -> anyhow::Result<_> {
|
||||
Ok(IndexScheduler::new(IndexSchedulerOptions {
|
||||
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
||||
auth_path: opt.db_path.join("auth"),
|
||||
tasks_path: opt.db_path.join("tasks"),
|
||||
update_file_path: opt.db_path.join("update_files"),
|
||||
indexes_path: opt.db_path.join("indexes"),
|
||||
snapshots_path: opt.snapshot_dir.clone(),
|
||||
dumps_path: opt.dump_dir.clone(),
|
||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features,
|
||||
})?)
|
||||
};
|
||||
|
||||
match (
|
||||
index_scheduler,
|
||||
index_scheduler_builder(),
|
||||
auth_controller.map_err(anyhow::Error::from),
|
||||
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
|
||||
) {
|
||||
@@ -283,13 +262,12 @@ fn open_or_create_database_unchecked(
|
||||
fn open_or_create_database(
|
||||
opt: &Opt,
|
||||
empty_db: bool,
|
||||
zookeeper: Option<Arc<ZooKeeper>>,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
if !empty_db {
|
||||
check_version_file(&opt.db_path)?;
|
||||
}
|
||||
|
||||
open_or_create_database_unchecked(opt, OnFailure::KeepDb, zookeeper)
|
||||
open_or_create_database_unchecked(opt, OnFailure::KeepDb)
|
||||
}
|
||||
|
||||
fn import_dump(
|
||||
@@ -299,7 +277,6 @@ fn import_dump(
|
||||
auth: &mut AuthController,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let reader = File::open(dump_path)?;
|
||||
let index_scheduler = index_scheduler.inner();
|
||||
let mut dump_reader = dump::DumpReader::open(reader)?;
|
||||
|
||||
if let Some(date) = dump_reader.date() {
|
||||
|
||||
@@ -2,7 +2,6 @@ use std::env;
|
||||
use std::io::{stderr, Write};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use actix_web::http::KeepAlive;
|
||||
use actix_web::web::Data;
|
||||
@@ -13,7 +12,6 @@ use meilisearch::analytics::Analytics;
|
||||
use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use zookeeper::ZooKeeper;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
@@ -65,10 +63,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let timeout = Duration::from_millis(2500);
|
||||
let zookeeper =
|
||||
opt.zk_url.as_ref().map(|url| Arc::new(ZooKeeper::connect(url, timeout, drop).unwrap()));
|
||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt, zookeeper).await?;
|
||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
let analytics = if !opt.no_analytics {
|
||||
|
||||
@@ -28,13 +28,6 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
|
||||
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
|
||||
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
|
||||
const MEILI_ENV: &str = "MEILI_ENV";
|
||||
const MEILI_ZK_URL: &str = "MEILI_ZK_URL";
|
||||
const MEILI_S3_URL: &str = "MEILI_S3_URL";
|
||||
const MEILI_S3_BUCKET: &str = "MEILI_S3_BUCKET";
|
||||
const MEILI_S3_ACCESS_KEY: &str = "MEILI_S3_ACCESS_KEY";
|
||||
const MEILI_S3_SECRET_KEY: &str = "MEILI_S3_SECRET_KEY";
|
||||
const MEILI_S3_SECURITY_TOKEN: &str = "MEILI_S3_SECURITY_TOKEN";
|
||||
const MEILI_S3_REGION: &str = "MEILI_S3_REGION";
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
|
||||
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
|
||||
@@ -63,7 +56,6 @@ const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
||||
const DEFAULT_ENV: &str = "development";
|
||||
const DEFAULT_S3_REGION: &str = "eu-central-1";
|
||||
const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB";
|
||||
const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
|
||||
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
|
||||
@@ -162,36 +154,6 @@ pub struct Opt {
|
||||
#[serde(default = "default_env")]
|
||||
pub env: String,
|
||||
|
||||
/// Sets the HTTP address and port used to communicate with the zookeeper cluster.
|
||||
/// If ran locally, the default url is `http://localhost:2181/`.
|
||||
#[clap(long, env = MEILI_ZK_URL)]
|
||||
pub zk_url: Option<String>,
|
||||
|
||||
/// Sets the address and port used to communicate with the S3 bucket.
|
||||
#[clap(long, env = MEILI_S3_URL)]
|
||||
pub s3_url: Option<String>,
|
||||
|
||||
/// Sets the region used to communicate with the s3 bucket.
|
||||
#[clap(long, env = MEILI_S3_REGION, default_value_t = default_s3_region())]
|
||||
#[serde(default = "default_s3_region")]
|
||||
pub s3_region: String,
|
||||
|
||||
/// Sets the S3 bucket name to use.
|
||||
#[clap(long, env = MEILI_S3_BUCKET)]
|
||||
pub s3_bucket: Option<String>,
|
||||
|
||||
/// Set the S3 access key. If used you must also set the secret key.
|
||||
#[clap(long, env = MEILI_S3_ACCESS_KEY)]
|
||||
pub s3_access_key: Option<String>,
|
||||
|
||||
/// Set the S3 secret key. If used you must also set the access key.
|
||||
#[clap(long, env = MEILI_S3_SECRET_KEY)]
|
||||
pub s3_secret_key: Option<String>,
|
||||
|
||||
/// Security token, can't be used with access key and secret key.
|
||||
#[clap(long, env = MEILI_S3_SECURITY_TOKEN)]
|
||||
pub s3_security_token: Option<String>,
|
||||
|
||||
/// Deactivates Meilisearch's built-in telemetry when provided.
|
||||
///
|
||||
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
|
||||
@@ -406,13 +368,6 @@ impl Opt {
|
||||
http_addr,
|
||||
master_key,
|
||||
env,
|
||||
zk_url,
|
||||
s3_url,
|
||||
s3_region,
|
||||
s3_bucket,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
s3_security_token,
|
||||
max_index_size: _,
|
||||
max_task_db_size: _,
|
||||
http_payload_size_limit,
|
||||
@@ -446,25 +401,6 @@ impl Opt {
|
||||
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
|
||||
}
|
||||
export_to_env_if_not_present(MEILI_ENV, env);
|
||||
if let Some(zk_url) = zk_url {
|
||||
export_to_env_if_not_present(MEILI_ZK_URL, zk_url);
|
||||
}
|
||||
if let Some(s3_url) = s3_url {
|
||||
export_to_env_if_not_present(MEILI_S3_URL, s3_url);
|
||||
}
|
||||
export_to_env_if_not_present(MEILI_S3_REGION, s3_region);
|
||||
if let Some(s3_bucket) = s3_bucket {
|
||||
export_to_env_if_not_present(MEILI_S3_BUCKET, s3_bucket);
|
||||
}
|
||||
if let Some(s3_access_key) = s3_access_key {
|
||||
export_to_env_if_not_present(MEILI_S3_ACCESS_KEY, s3_access_key);
|
||||
}
|
||||
if let Some(s3_secret_key) = s3_secret_key {
|
||||
export_to_env_if_not_present(MEILI_S3_SECRET_KEY, s3_secret_key);
|
||||
}
|
||||
if let Some(s3_security_token) = s3_security_token {
|
||||
export_to_env_if_not_present(MEILI_S3_SECURITY_TOKEN, s3_security_token);
|
||||
}
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
{
|
||||
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
|
||||
@@ -611,7 +547,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
Ok(Self {
|
||||
log_every_n: Some(DEFAULT_LOG_EVERY_N),
|
||||
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
|
||||
thread_pool: Some(Arc::new(thread_pool)),
|
||||
thread_pool: Some(thread_pool),
|
||||
max_positions_per_attributes: None,
|
||||
skip_index_budget: other.skip_index_budget,
|
||||
..Default::default()
|
||||
@@ -779,10 +715,6 @@ fn default_env() -> String {
|
||||
DEFAULT_ENV.to_string()
|
||||
}
|
||||
|
||||
fn default_s3_region() -> String {
|
||||
DEFAULT_S3_REGION.to_string()
|
||||
}
|
||||
|
||||
fn default_max_index_size() -> Byte {
|
||||
Byte::from_bytes(INDEX_SIZE)
|
||||
}
|
||||
|
||||
@@ -41,10 +41,14 @@ pub async fn create_api_key(
|
||||
_req: HttpRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let v = body.into_inner();
|
||||
let key = auth_controller.create_key(v)?;
|
||||
let key = KeyView::from_key(key, &auth_controller);
|
||||
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let key = auth_controller.create_key(v)?;
|
||||
Ok(KeyView::from_key(key, &auth_controller))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::Created().json(key))
|
||||
Ok(HttpResponse::Created().json(res))
|
||||
}
|
||||
|
||||
#[derive(Deserr, Debug, Clone, Copy)]
|
||||
@@ -106,11 +110,17 @@ pub async fn patch_api_key(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
let patch_api_key = body.into_inner();
|
||||
let uid = Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
let key = auth_controller.update_key(uid, patch_api_key)?;
|
||||
let key = KeyView::from_key(key, &auth_controller);
|
||||
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let uid =
|
||||
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
let key = auth_controller.update_key(uid, patch_api_key)?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(key))
|
||||
Ok(KeyView::from_key(key, &auth_controller))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::Ok().json(res))
|
||||
}
|
||||
|
||||
pub async fn delete_api_key(
|
||||
@@ -118,8 +128,13 @@ pub async fn delete_api_key(
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
let uid = Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
auth_controller.delete_key(uid)?;
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let uid =
|
||||
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
auth_controller.delete_key(uid)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
}
|
||||
|
||||
@@ -29,7 +29,8 @@ pub async fn create_dump(
|
||||
keys: auth_controller.list_keys()?,
|
||||
instance_uid: analytics.instance_uid().cloned(),
|
||||
};
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -78,6 +78,6 @@ async fn patch_features(
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
index_scheduler.inner().put_runtime_features(new_features)?;
|
||||
index_scheduler.put_runtime_features(new_features)?;
|
||||
Ok(HttpResponse::Ok().json(new_features))
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::io::{BufReader, ErrorKind, Seek, SeekFrom};
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::web::Data;
|
||||
@@ -129,7 +129,8 @@ pub async fn delete_document(
|
||||
index_uid: index_uid.to_string(),
|
||||
documents_ids: vec![document_id],
|
||||
};
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@@ -395,12 +396,11 @@ async fn document_addition(
|
||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||
}
|
||||
|
||||
if let Err(e) = buffer.seek(SeekFrom::Start(0)).await {
|
||||
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
||||
}
|
||||
|
||||
let read_file = buffer.into_inner().into_std().await;
|
||||
let s3 = index_scheduler.s3.clone();
|
||||
let documents_count = tokio::task::spawn_blocking(move || {
|
||||
let documents_count = match format {
|
||||
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
|
||||
@@ -409,19 +409,8 @@ async fn document_addition(
|
||||
}
|
||||
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
|
||||
};
|
||||
|
||||
if let Some(s3) = s3 {
|
||||
update_file.seek(SeekFrom::Start(0)).unwrap();
|
||||
let mut reader = BufReader::new(&*update_file);
|
||||
match s3.put_object_stream(&mut reader, format!("/update-files/{}", uuid)) {
|
||||
Ok(_) | Err(s3::error::S3Error::Http(_, _)) => (),
|
||||
Err(e) => panic!("Error {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
// we NEED to persist the file here because we moved the `udpate_file` in another task.
|
||||
update_file.persist()?;
|
||||
|
||||
Ok(documents_count)
|
||||
})
|
||||
.await;
|
||||
@@ -456,7 +445,7 @@ async fn document_addition(
|
||||
};
|
||||
|
||||
let scheduler = index_scheduler.clone();
|
||||
let task = match scheduler.register(task) {
|
||||
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
|
||||
Ok(task) => task,
|
||||
Err(e) => {
|
||||
index_scheduler.delete_update_file(uuid)?;
|
||||
@@ -487,7 +476,8 @@ pub async fn delete_documents_batch(
|
||||
|
||||
let task =
|
||||
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -522,7 +512,8 @@ pub async fn delete_documents_by_filter(
|
||||
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
|
||||
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
|
||||
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -538,7 +529,8 @@ pub async fn clear_all_documents(
|
||||
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
|
||||
|
||||
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -135,7 +135,8 @@ pub async fn create_index(
|
||||
);
|
||||
|
||||
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
} else {
|
||||
@@ -202,7 +203,8 @@ pub async fn update_index(
|
||||
primary_key: body.primary_key,
|
||||
};
|
||||
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -214,7 +216,8 @@ pub async fn delete_index(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -55,7 +55,10 @@ macro_rules! make_setting_route {
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -94,7 +97,10 @@ macro_rules! make_setting_route {
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -304,6 +310,81 @@ make_setting_route!(
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/non-separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
|
||||
>,
|
||||
non_separator_tokens,
|
||||
"nonSeparatorTokens",
|
||||
analytics,
|
||||
|non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"nonSeparatorTokens Updated".to_string(),
|
||||
json!({
|
||||
"non_separator_tokens": {
|
||||
"total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
|
||||
>,
|
||||
separator_tokens,
|
||||
"separatorTokens",
|
||||
analytics,
|
||||
|separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"separatorTokens Updated".to_string(),
|
||||
json!({
|
||||
"separator_tokens": {
|
||||
"total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/dictionary",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
|
||||
>,
|
||||
dictionary,
|
||||
"dictionary",
|
||||
analytics,
|
||||
|dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"dictionary Updated".to_string(),
|
||||
json!({
|
||||
"dictionary": {
|
||||
"total": dictionary.as_ref().map(|dictionary| dictionary.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/synonyms",
|
||||
put,
|
||||
@@ -460,6 +541,9 @@ generate_configure!(
|
||||
searchable_attributes,
|
||||
distinct_attribute,
|
||||
stop_words,
|
||||
separator_tokens,
|
||||
non_separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
ranking_rules,
|
||||
typo_tolerance,
|
||||
@@ -580,7 +664,8 @@ pub async fn update_all(
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -615,7 +700,8 @@ pub async fn delete_all(
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -18,6 +18,7 @@ use serde_json::json;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::macros::format_description;
|
||||
use time::{Date, Duration, OffsetDateTime, Time};
|
||||
use tokio::task;
|
||||
|
||||
use super::SummarizedTaskView;
|
||||
use crate::analytics::Analytics;
|
||||
@@ -324,12 +325,15 @@ async fn cancel_tasks(
|
||||
|
||||
let query = params.into_query();
|
||||
|
||||
let (tasks, _) =
|
||||
index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?;
|
||||
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
|
||||
&index_scheduler.read_txn()?,
|
||||
&query,
|
||||
index_scheduler.filters(),
|
||||
)?;
|
||||
let task_cancelation =
|
||||
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
|
||||
|
||||
let task = index_scheduler.register(task_cancelation)?;
|
||||
let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
@@ -366,12 +370,15 @@ async fn delete_tasks(
|
||||
);
|
||||
let query = params.into_query();
|
||||
|
||||
let (tasks, _) =
|
||||
index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?;
|
||||
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
|
||||
&index_scheduler.read_txn()?,
|
||||
&query,
|
||||
index_scheduler.filters(),
|
||||
)?;
|
||||
let task_deletion =
|
||||
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
|
||||
|
||||
let task = index_scheduler.register(task_deletion)?;
|
||||
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
|
||||
@@ -491,6 +491,20 @@ pub fn perform_search(
|
||||
tokenizer_builder.allow_list(&script_lang_map);
|
||||
}
|
||||
|
||||
let separators = index.allowed_separators(&rtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
tokenizer_builder.separators(separators);
|
||||
}
|
||||
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
tokenizer_builder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
|
||||
formatter_builder.crop_marker(query.crop_marker);
|
||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
||||
|
||||
@@ -39,7 +39,7 @@ impl Server {
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, None).await.unwrap();
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Server { service, _dir: Some(dir) }
|
||||
@@ -54,7 +54,7 @@ impl Server {
|
||||
|
||||
options.master_key = Some("MASTER_KEY".to_string());
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, None).await.unwrap();
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Server { service, _dir: Some(dir) }
|
||||
@@ -67,7 +67,7 @@ impl Server {
|
||||
}
|
||||
|
||||
pub async fn new_with_options(options: Opt) -> Result<Self, anyhow::Error> {
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, None).await?;
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options)?;
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Ok(Server { service, _dir: None })
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -16,6 +16,9 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
|
||||
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]),
|
||||
);
|
||||
map.insert("stop_words", json!([]));
|
||||
map.insert("non_separator_tokens", json!([]));
|
||||
map.insert("separator_tokens", json!([]));
|
||||
map.insert("dictionary", json!([]));
|
||||
map.insert("synonyms", json!({}));
|
||||
map.insert(
|
||||
"faceting",
|
||||
@@ -51,7 +54,7 @@ async fn get_settings() {
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
let settings = response.as_object().unwrap();
|
||||
assert_eq!(settings.keys().len(), 11);
|
||||
assert_eq!(settings.keys().len(), 14);
|
||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||
@@ -62,6 +65,9 @@ async fn get_settings() {
|
||||
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"])
|
||||
);
|
||||
assert_eq!(settings["stopWords"], json!([]));
|
||||
assert_eq!(settings["nonSeparatorTokens"], json!([]));
|
||||
assert_eq!(settings["separatorTokens"], json!([]));
|
||||
assert_eq!(settings["dictionary"], json!([]));
|
||||
assert_eq!(
|
||||
settings["faceting"],
|
||||
json!({
|
||||
@@ -272,6 +278,9 @@ test_setting_routes!(
|
||||
searchable_attributes put,
|
||||
distinct_attribute put,
|
||||
stop_words put,
|
||||
separator_tokens put,
|
||||
non_separator_tokens put,
|
||||
dictionary put,
|
||||
ranking_rules put,
|
||||
synonyms put,
|
||||
pagination patch,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod get_settings;
|
||||
mod tokenizer_customization;
|
||||
|
||||
467
meilisearch/tests/settings/tokenizer_customization.rs
Normal file
467
meilisearch/tests/settings/tokenizer_customization.rs
Normal file
@@ -0,0 +1,467 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::common::Server;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn set_and_reset() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": ["#", "&"],
|
||||
"separatorTokens": ["&sep", "<br/>"],
|
||||
"dictionary": ["J.R.R.", "J. R. R."],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, _) = index.settings().await;
|
||||
snapshot!(json_string!(response["nonSeparatorTokens"]), @r###"
|
||||
[
|
||||
"#",
|
||||
"&"
|
||||
]
|
||||
"###);
|
||||
snapshot!(json_string!(response["separatorTokens"]), @r###"
|
||||
[
|
||||
"&sep",
|
||||
"<br/>"
|
||||
]
|
||||
"###);
|
||||
snapshot!(json_string!(response["dictionary"]), @r###"
|
||||
[
|
||||
"J. R. R.",
|
||||
"J.R.R."
|
||||
]
|
||||
"###);
|
||||
|
||||
index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": null,
|
||||
"separatorTokens": null,
|
||||
"dictionary": null,
|
||||
}))
|
||||
.await;
|
||||
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, _) = index.settings().await;
|
||||
snapshot!(json_string!(response["nonSeparatorTokens"]), @"[]");
|
||||
snapshot!(json_string!(response["separatorTokens"]), @"[]");
|
||||
snapshot!(json_string!(response["dictionary"]), @"[]");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn set_and_search() {
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "G#D#G#D#G#C#D#G#C#",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
},
|
||||
]);
|
||||
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": ["#", "&"],
|
||||
"separatorTokens": ["<br/>", "&sep"],
|
||||
"dictionary": ["#", "A#", "B#", "C#", "D#", "E#", "F#", "G#"],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "&", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "Mac <em>&</em> cheese"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "Mac&sep<em>&</em>&sepcheese"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Mac & cheese", "attributesToHighlight": ["content"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Mac&sep&&sepcheese", "attributesToHighlight": ["content"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "C#D#G", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"content": "G#D#G#D#G#C#D#G#C#",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>G</em>#<em>D#</em><em>G</em>#<em>D#</em><em>G</em>#<em>C#</em><em>D#</em><em>G</em>#<em>C#</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "#", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn advanced_synergies() {
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
},
|
||||
]);
|
||||
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"dictionary": ["J.R.R.", "J. R. R."],
|
||||
"synonyms": {
|
||||
"J.R.R.": ["jrr", "J. R. R."],
|
||||
"J. R. R.": ["jrr", "J.R.R."],
|
||||
"jrr": ["J.R.R.", "J. R. R."],
|
||||
"J.K.": ["jk", "J. K."],
|
||||
"J. K.": ["jk", "J.K."],
|
||||
"jk": ["J.K.", "J. K."],
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J.R.R.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "jrr", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J. R. R.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// Only update dictionary, the synonyms should be recomputed.
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J.K.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J. K.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R.</em> R. Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::fs::File;
|
||||
use std::mem::size_of;
|
||||
use std::path::Path;
|
||||
@@ -61,8 +61,12 @@ pub mod main_key {
|
||||
pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
|
||||
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
|
||||
pub const STOP_WORDS_KEY: &str = "stop-words";
|
||||
pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens";
|
||||
pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens";
|
||||
pub const DICTIONARY_KEY: &str = "dictionary";
|
||||
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
|
||||
pub const SYNONYMS_KEY: &str = "synonyms";
|
||||
pub const USER_DEFINED_SYNONYMS_KEY: &str = "user-defined-synonyms";
|
||||
pub const WORDS_FST_KEY: &str = "words-fst";
|
||||
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
||||
pub const CREATED_AT_KEY: &str = "created-at";
|
||||
@@ -1055,18 +1059,116 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* non separator tokens */
|
||||
|
||||
pub(crate) fn put_non_separator_tokens(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
|
||||
}
|
||||
|
||||
pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>(
|
||||
rtxn,
|
||||
main_key::NON_SEPARATOR_TOKENS_KEY,
|
||||
)?)
|
||||
}
|
||||
|
||||
/* separator tokens */
|
||||
|
||||
pub(crate) fn put_separator_tokens(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY)
|
||||
}
|
||||
|
||||
pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
|
||||
}
|
||||
|
||||
/* separators easing method */
|
||||
|
||||
pub fn allowed_separators(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
let default_separators =
|
||||
charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string());
|
||||
let mut separators: Option<BTreeSet<_>> = None;
|
||||
if let Some(mut separator_tokens) = self.separator_tokens(rtxn)? {
|
||||
separator_tokens.extend(default_separators.clone());
|
||||
separators = Some(separator_tokens);
|
||||
}
|
||||
|
||||
if let Some(non_separator_tokens) = self.non_separator_tokens(rtxn)? {
|
||||
separators = separators
|
||||
.or_else(|| Some(default_separators.collect()))
|
||||
.map(|separators| &separators - &non_separator_tokens);
|
||||
}
|
||||
|
||||
Ok(separators)
|
||||
}
|
||||
|
||||
/* dictionary */
|
||||
|
||||
pub(crate) fn put_dictionary(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY)
|
||||
}
|
||||
|
||||
pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?)
|
||||
}
|
||||
|
||||
/* synonyms */
|
||||
|
||||
pub(crate) fn put_synonyms(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
|
||||
user_defined_synonyms: &BTreeMap<String, Vec<String>>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?;
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(
|
||||
wtxn,
|
||||
main_key::USER_DEFINED_SYNONYMS_KEY,
|
||||
user_defined_synonyms,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?;
|
||||
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
|
||||
}
|
||||
|
||||
pub fn user_defined_synonyms(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
) -> heed::Result<BTreeMap<String, Vec<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
|
||||
|
||||
@@ -488,6 +488,20 @@ pub fn execute_search(
|
||||
tokbuilder.stop_words(stop_words);
|
||||
}
|
||||
|
||||
let separators = ctx.index.allowed_separators(ctx.txn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
tokbuilder.separators(separators);
|
||||
}
|
||||
|
||||
let dictionary = ctx.index.dictionary(ctx.txn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
tokbuilder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
let script_lang_map = ctx.index.script_language(ctx.txn)?;
|
||||
if !script_lang_map.is_empty() {
|
||||
tokbuilder.allow_list(&script_lang_map);
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::io::Cursor;
|
||||
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::{hashmap, hashset};
|
||||
use maplit::{btreemap, hashset};
|
||||
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
@@ -33,7 +33,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
});
|
||||
builder.set_synonyms(hashmap! {
|
||||
builder.set_synonyms(btreemap! {
|
||||
S("hello") => vec![S("good morning")],
|
||||
S("world") => vec![S("earth")],
|
||||
S("america") => vec![S("the united states")],
|
||||
|
||||
@@ -15,7 +15,7 @@ they store fewer sprximities than the regular word sprximity DB.
|
||||
|
||||
*/
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
@@ -336,7 +336,7 @@ fn test_proximity_split_word() {
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
let mut syns = HashMap::new();
|
||||
let mut syns = BTreeMap::new();
|
||||
syns.insert("xyz".to_owned(), vec!["sun flower".to_owned()]);
|
||||
s.set_synonyms(syns);
|
||||
})
|
||||
|
||||
@@ -18,7 +18,7 @@ if `words` doesn't exist before it.
|
||||
14. Synonyms cost nothing according to the typo ranking rule
|
||||
*/
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
@@ -591,7 +591,7 @@ fn test_typo_synonyms() {
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Typo]);
|
||||
|
||||
let mut synonyms = HashMap::new();
|
||||
let mut synonyms = BTreeMap::new();
|
||||
synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);
|
||||
synonyms.insert("fast brownish".to_owned(), vec!["quick brown".to_owned()]);
|
||||
|
||||
|
||||
@@ -28,6 +28,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
stop_words: Option<&fst::Set<&[u8]>>,
|
||||
allowed_separators: Option<&Vec<&str>>,
|
||||
dictionary: Option<&Vec<&str>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||
puffin::profile_function!();
|
||||
@@ -52,6 +54,14 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
if let Some(stop_words) = stop_words {
|
||||
tokenizer_builder.stop_words(stop_words);
|
||||
}
|
||||
if let Some(dictionary) = dictionary {
|
||||
// let dictionary: Vec<_> = dictionary.iter().map(String::as_str).collect();
|
||||
tokenizer_builder.words_dict(dictionary.as_slice());
|
||||
}
|
||||
if let Some(separators) = allowed_separators {
|
||||
// let separators: Vec<_> = separators.iter().map(String::as_str).collect();
|
||||
tokenizer_builder.separators(separators.as_slice());
|
||||
}
|
||||
let tokenizer = tokenizer_builder.build();
|
||||
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
|
||||
@@ -49,6 +49,8 @@ pub(crate) fn data_from_obkv_documents(
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
stop_words: Option<fst::Set<&[u8]>>,
|
||||
allowed_separators: Option<Vec<&str>>,
|
||||
dictionary: Option<Vec<&str>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
exact_attributes: HashSet<FieldId>,
|
||||
) -> Result<()> {
|
||||
@@ -76,6 +78,8 @@ pub(crate) fn data_from_obkv_documents(
|
||||
geo_fields_ids,
|
||||
vectors_field_id,
|
||||
&stop_words,
|
||||
&allowed_separators,
|
||||
&dictionary,
|
||||
max_positions_per_attributes,
|
||||
)
|
||||
})
|
||||
@@ -289,6 +293,8 @@ fn send_and_extract_flattened_documents_data(
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
stop_words: &Option<fst::Set<&[u8]>>,
|
||||
allowed_separators: &Option<Vec<&str>>,
|
||||
dictionary: &Option<Vec<&str>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
@@ -344,6 +350,8 @@ fn send_and_extract_flattened_documents_data(
|
||||
indexer,
|
||||
searchable_fields,
|
||||
stop_words.as_ref(),
|
||||
allowed_separators.as_ref(),
|
||||
dictionary.as_ref(),
|
||||
max_positions_per_attributes,
|
||||
)?;
|
||||
|
||||
|
||||
@@ -316,6 +316,12 @@ where
|
||||
let vectors_field_id = self.index.fields_ids_map(self.wtxn)?.id("_vectors");
|
||||
|
||||
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||
let separators = self.index.allowed_separators(self.wtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
let dictionary = self.index.dictionary(self.wtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
|
||||
|
||||
let pool_params = GrenadParameters {
|
||||
@@ -353,6 +359,8 @@ where
|
||||
geo_fields_ids,
|
||||
vectors_field_id,
|
||||
stop_words,
|
||||
separators,
|
||||
dictionary,
|
||||
max_positions_per_attributes,
|
||||
exact_attributes,
|
||||
)
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use grenad::CompressionType;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
@@ -11,7 +9,7 @@ pub struct IndexerConfig {
|
||||
pub max_memory: Option<usize>,
|
||||
pub chunk_compression_type: CompressionType,
|
||||
pub chunk_compression_level: Option<u32>,
|
||||
pub thread_pool: Option<Arc<ThreadPool>>,
|
||||
pub thread_pool: Option<ThreadPool>,
|
||||
pub max_positions_per_attributes: Option<u32>,
|
||||
pub skip_index_budget: bool,
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
use charabia::{Normalize, Tokenizer, TokenizerBuilder};
|
||||
@@ -112,8 +112,11 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
sortable_fields: Setting<HashSet<String>>,
|
||||
criteria: Setting<Vec<Criterion>>,
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
non_separator_tokens: Setting<BTreeSet<String>>,
|
||||
separator_tokens: Setting<BTreeSet<String>>,
|
||||
dictionary: Setting<BTreeSet<String>>,
|
||||
distinct_field: Setting<String>,
|
||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||
synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
primary_key: Setting<String>,
|
||||
authorize_typos: Setting<bool>,
|
||||
min_word_len_two_typos: Setting<u8>,
|
||||
@@ -141,6 +144,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
sortable_fields: Setting::NotSet,
|
||||
criteria: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
distinct_field: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
primary_key: Setting::NotSet,
|
||||
@@ -205,6 +211,39 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
if stop_words.is_empty() { Setting::Reset } else { Setting::Set(stop_words) }
|
||||
}
|
||||
|
||||
pub fn reset_non_separator_tokens(&mut self) {
|
||||
self.non_separator_tokens = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_non_separator_tokens(&mut self, non_separator_tokens: BTreeSet<String>) {
|
||||
self.non_separator_tokens = if non_separator_tokens.is_empty() {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(non_separator_tokens)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset_separator_tokens(&mut self) {
|
||||
self.separator_tokens = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_separator_tokens(&mut self, separator_tokens: BTreeSet<String>) {
|
||||
self.separator_tokens = if separator_tokens.is_empty() {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(separator_tokens)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset_dictionary(&mut self) {
|
||||
self.dictionary = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_dictionary(&mut self, dictionary: BTreeSet<String>) {
|
||||
self.dictionary =
|
||||
if dictionary.is_empty() { Setting::Reset } else { Setting::Set(dictionary) }
|
||||
}
|
||||
|
||||
pub fn reset_distinct_field(&mut self) {
|
||||
self.distinct_field = Setting::Reset;
|
||||
}
|
||||
@@ -217,7 +256,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.synonyms = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_synonyms(&mut self, synonyms: HashMap<String, Vec<String>>) {
|
||||
pub fn set_synonyms(&mut self, synonyms: BTreeMap<String, Vec<String>>) {
|
||||
self.synonyms = if synonyms.is_empty() { Setting::Reset } else { Setting::Set(synonyms) }
|
||||
}
|
||||
|
||||
@@ -452,9 +491,84 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
fn update_non_separator_tokens(&mut self) -> Result<bool> {
|
||||
let changes = match self.non_separator_tokens {
|
||||
Setting::Set(ref non_separator_tokens) => {
|
||||
let current = self.index.non_separator_tokens(self.wtxn)?;
|
||||
|
||||
// Does the new list differ from the previous one?
|
||||
if current.map_or(true, |current| ¤t != non_separator_tokens) {
|
||||
self.index.put_non_separator_tokens(self.wtxn, non_separator_tokens)?;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_non_separator_tokens(self.wtxn)?,
|
||||
Setting::NotSet => false,
|
||||
};
|
||||
|
||||
// the synonyms must be updated if non separator tokens have been updated.
|
||||
if changes && self.synonyms == Setting::NotSet {
|
||||
self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
|
||||
}
|
||||
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
fn update_separator_tokens(&mut self) -> Result<bool> {
|
||||
let changes = match self.separator_tokens {
|
||||
Setting::Set(ref separator_tokens) => {
|
||||
let current = self.index.separator_tokens(self.wtxn)?;
|
||||
|
||||
// Does the new list differ from the previous one?
|
||||
if current.map_or(true, |current| ¤t != separator_tokens) {
|
||||
self.index.put_separator_tokens(self.wtxn, separator_tokens)?;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_separator_tokens(self.wtxn)?,
|
||||
Setting::NotSet => false,
|
||||
};
|
||||
|
||||
// the synonyms must be updated if separator tokens have been updated.
|
||||
if changes && self.synonyms == Setting::NotSet {
|
||||
self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
|
||||
}
|
||||
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
fn update_dictionary(&mut self) -> Result<bool> {
|
||||
let changes = match self.dictionary {
|
||||
Setting::Set(ref dictionary) => {
|
||||
let current = self.index.dictionary(self.wtxn)?;
|
||||
|
||||
// Does the new list differ from the previous one?
|
||||
if current.map_or(true, |current| ¤t != dictionary) {
|
||||
self.index.put_dictionary(self.wtxn, dictionary)?;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_dictionary(self.wtxn)?,
|
||||
Setting::NotSet => false,
|
||||
};
|
||||
|
||||
// the synonyms must be updated if dictionary has been updated.
|
||||
if changes && self.synonyms == Setting::NotSet {
|
||||
self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
|
||||
}
|
||||
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
fn update_synonyms(&mut self) -> Result<bool> {
|
||||
match self.synonyms {
|
||||
Setting::Set(ref synonyms) => {
|
||||
Setting::Set(ref user_synonyms) => {
|
||||
fn normalize(tokenizer: &Tokenizer, text: &str) -> Vec<String> {
|
||||
tokenizer
|
||||
.tokenize(text)
|
||||
@@ -473,10 +587,25 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
if let Some(ref stop_words) = stop_words {
|
||||
builder.stop_words(stop_words);
|
||||
}
|
||||
|
||||
let separators = self.index.allowed_separators(self.wtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
builder.separators(separators);
|
||||
}
|
||||
|
||||
let dictionary = self.index.dictionary(self.wtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
builder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
let tokenizer = builder.build();
|
||||
|
||||
let mut new_synonyms = HashMap::new();
|
||||
for (word, synonyms) in synonyms {
|
||||
for (word, synonyms) in user_synonyms {
|
||||
// Normalize both the word and associated synonyms.
|
||||
let normalized_word = normalize(&tokenizer, word);
|
||||
let normalized_synonyms =
|
||||
@@ -497,7 +626,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
let old_synonyms = self.index.synonyms(self.wtxn)?;
|
||||
|
||||
if new_synonyms != old_synonyms {
|
||||
self.index.put_synonyms(self.wtxn, &new_synonyms)?;
|
||||
self.index.put_synonyms(self.wtxn, &new_synonyms, user_synonyms)?;
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
@@ -757,11 +886,17 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
let faceted_updated = old_faceted_fields != new_faceted_fields;
|
||||
|
||||
let stop_words_updated = self.update_stop_words()?;
|
||||
let non_separator_tokens_updated = self.update_non_separator_tokens()?;
|
||||
let separator_tokens_updated = self.update_separator_tokens()?;
|
||||
let dictionary_updated = self.update_dictionary()?;
|
||||
let synonyms_updated = self.update_synonyms()?;
|
||||
let searchable_updated = self.update_searchable()?;
|
||||
let exact_attributes_updated = self.update_exact_attributes()?;
|
||||
|
||||
if stop_words_updated
|
||||
|| non_separator_tokens_updated
|
||||
|| separator_tokens_updated
|
||||
|| dictionary_updated
|
||||
|| faceted_updated
|
||||
|| synonyms_updated
|
||||
|| searchable_updated
|
||||
@@ -778,7 +913,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
use heed::types::ByteSlice;
|
||||
use maplit::{btreeset, hashmap, hashset};
|
||||
use maplit::{btreemap, btreeset, hashset};
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
@@ -1244,7 +1379,7 @@ mod tests {
|
||||
// In the same transaction provide some synonyms
|
||||
index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
settings.set_synonyms(hashmap! {
|
||||
settings.set_synonyms(btreemap! {
|
||||
"blini".to_string() => vec!["crepes".to_string()],
|
||||
"super like".to_string() => vec!["love".to_string()],
|
||||
"puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()]
|
||||
@@ -1540,6 +1675,9 @@ mod tests {
|
||||
sortable_fields,
|
||||
criteria,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
separator_tokens,
|
||||
dictionary,
|
||||
distinct_field,
|
||||
synonyms,
|
||||
primary_key,
|
||||
@@ -1558,6 +1696,9 @@ mod tests {
|
||||
assert!(matches!(sortable_fields, Setting::NotSet));
|
||||
assert!(matches!(criteria, Setting::NotSet));
|
||||
assert!(matches!(stop_words, Setting::NotSet));
|
||||
assert!(matches!(non_separator_tokens, Setting::NotSet));
|
||||
assert!(matches!(separator_tokens, Setting::NotSet));
|
||||
assert!(matches!(dictionary, Setting::NotSet));
|
||||
assert!(matches!(distinct_field, Setting::NotSet));
|
||||
assert!(matches!(synonyms, Setting::NotSet));
|
||||
assert!(matches!(primary_key, Setting::NotSet));
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::io::Cursor;
|
||||
use big_s::S;
|
||||
use either::{Either, Left, Right};
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::{hashmap, hashset};
|
||||
use maplit::{btreemap, hashset};
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
|
||||
@@ -51,7 +51,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
});
|
||||
builder.set_synonyms(hashmap! {
|
||||
builder.set_synonyms(btreemap! {
|
||||
S("hello") => vec![S("good morning")],
|
||||
S("world") => vec![S("earth")],
|
||||
S("america") => vec![S("the united states")],
|
||||
|
||||
Reference in New Issue
Block a user