mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-08 21:55:42 +00:00
Compare commits
36 Commits
setup-sent
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b126bf3aec | ||
|
|
e82ff56416 | ||
|
|
1b26dde438 | ||
|
|
3bb644b54d | ||
|
|
34b9145db2 | ||
|
|
76c05d1b20 | ||
|
|
34fac115d5 | ||
|
|
a09686fcbd | ||
|
|
393be40179 | ||
|
|
487d493f49 | ||
|
|
9258e5b5bf | ||
|
|
462b4654c4 | ||
|
|
abfa7ded25 | ||
|
|
f2837aaec2 | ||
|
|
11df155598 | ||
|
|
651657c03e | ||
|
|
b9ad59c969 | ||
|
|
66aa682e23 | ||
|
|
256cf33bca | ||
|
|
9945cbf9db | ||
|
|
03d0f628bd | ||
|
|
ea78060916 | ||
|
|
b42d48187a | ||
|
|
679c0b0f97 | ||
|
|
e02d0064bd | ||
|
|
7ef3572f11 | ||
|
|
93285041a9 | ||
|
|
dc3d9c90d9 | ||
|
|
287cf25d39 | ||
|
|
66aa6d5871 | ||
|
|
8ac5b765bc | ||
|
|
cea93e9a37 | ||
|
|
085aad0a94 | ||
|
|
6db80b0836 | ||
|
|
8dc5acf998 | ||
|
|
fc2590fc9d |
1
.github/workflows/publish-apt-brew-pkg.yml
vendored
1
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -53,5 +53,6 @@ jobs:
|
||||
uses: mislav/bump-homebrew-formula-action@v2
|
||||
with:
|
||||
formula-name: meilisearch
|
||||
formula-path: Formula/m/meilisearch.rb
|
||||
env:
|
||||
COMMITTER_TOKEN: ${{ secrets.HOMEBREW_COMMITTER_TOKEN }}
|
||||
|
||||
255
Cargo.lock
generated
255
Cargo.lock
generated
@@ -700,8 +700,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "charabia"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "098219a776307414866165a03a9cc68c1578764fe3616fe979e1c280790ddd73"
|
||||
source = "git+https://github.com/meilisearch/charabia?branch=main#5c3d09a7127dcf5e0e5d94d991c4d3d5ef4768cc"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"cow-utils",
|
||||
@@ -1073,16 +1072,6 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "debugid"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"uuid 1.4.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deranged"
|
||||
version = "0.3.7"
|
||||
@@ -1454,18 +1443,7 @@ dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
"nom_locate",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "findshlibs"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"winapi",
|
||||
"unescaper",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1816,17 +1794,6 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hostname"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"match_cfg",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.9"
|
||||
@@ -2209,9 +2176,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-cc-cedict-builder"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d2e8f2ca97ddf952fe340642511b9c14b373cb2eef711d526bb8ef2ca0969b8"
|
||||
checksum = "6f567a47e47b5420908424de2c6c5e424e3cafe588d0146bd128c0f3755758a3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2228,9 +2195,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-compress"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f72b460559bcbe8a9cee85ea4a5056133ed3abf373031191589236e656d65b59"
|
||||
checksum = "49f3e553d55ebe9881fa5e5de588b0a153456e93564d17dfbef498912caf63a2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@@ -2239,9 +2206,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-core"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f586eb8a9393c32d5525e0e9336a3727bd1329674740097126f3b0bff8a1a1ea"
|
||||
checksum = "a9a2440cc156a4a911a174ec68203543d1efb10df3a700a59b6bf581e453c726"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2256,9 +2223,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-decompress"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fb1facd8da698072fcc7338bd757730db53d59f313f44dd583fa03681dcc0e1"
|
||||
checksum = "e077a410e61c962cb526f71b7effd62ffc607488a8f61869c937582d2ccb529b"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@@ -2267,9 +2234,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-dictionary"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec7be7410b1da7017a8948986b87af67082f605e9a716f0989790d795d677f0c"
|
||||
checksum = "d9f57491adf7b311a3ee87f5e4a36454df16a2ec73de4ef28b2106fac80bd782"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2287,9 +2254,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-builder"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "705d07f8a45d04fd95149f7ad41a26d1f9e56c9c00402be6f9dd05e3d88b99c6"
|
||||
checksum = "a3476ec7748aebd2eb23d496ddfce5e7e0a5c031cffcd214451043e02d029f11"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2308,9 +2275,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-neologd-builder"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "633a93983ba13fba42328311a501091bd4a7aff0c94ae9eaa9d4733dd2b0468a"
|
||||
checksum = "7b1c7576a02d5e4af2bf62de51790a01bc4b8bc0d0b6a6b86a46b157f5cb306d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2329,9 +2296,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a428e0d316b6c86f51bd919479692bc41ad840dba266ebc044663970f431ea18"
|
||||
checksum = "b713ecd5b827d7d448c3c5eb3c6d5899ecaf22cd17087599996349a02c76828d"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2346,9 +2313,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic-builder"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a5288704c6b8a069c0a1705c38758e836497698b50453373ab3d56c6f9a7ef8"
|
||||
checksum = "3e545752f6487be87b572529ad594cb3b48d2ef20821516f598b2d152d23277b"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2366,9 +2333,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-tokenizer"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "106ba439b2e87529d9bbedbb88d69f635baba1195c26502b308f55a85885fc81"
|
||||
checksum = "24a2d4606a5a4da62ac4a3680ee884a75da7f0c892dc967fc9cb983ceba39a8f"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2381,9 +2348,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3399b6dcfe1701333451d184ff3c677f433b320153427b146360c9e4bd8cb816"
|
||||
checksum = "388b1bdf81794b5d5b8057ce0321c58ff4b90d676b637948ccc7863ae2f43d28"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2398,9 +2365,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic-builder"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b698227fdaeac32289173ab389b990d4eb00a40cbc9912020f69a0c491dabf55"
|
||||
checksum = "cdfa3e29a22c047da57fadd960ff674b720de15a1e2fb35b5ed67f3408afb469"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2524,12 +2491,6 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||
|
||||
[[package]]
|
||||
name = "match_cfg"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
|
||||
|
||||
[[package]]
|
||||
name = "md5"
|
||||
version = "0.7.0"
|
||||
@@ -2610,8 +2571,6 @@ dependencies = [
|
||||
"rustls 0.20.8",
|
||||
"rustls-pemfile",
|
||||
"segment",
|
||||
"sentry",
|
||||
"sentry-actix",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
@@ -2931,17 +2890,6 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "os_info"
|
||||
version = "3.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "006e42d5b888366f1880eda20371fedde764ed2213dc8496f49622fa0c99cd5e"
|
||||
dependencies = [
|
||||
"log",
|
||||
"serde",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "page_size"
|
||||
version = "0.4.2"
|
||||
@@ -3664,126 +3612,6 @@ version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
|
||||
|
||||
[[package]]
|
||||
name = "sentry"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2e95efd0cefa32028cdb9766c96de71d96671072f9fb494dc9fb84c0ef93e52b"
|
||||
dependencies = [
|
||||
"httpdate",
|
||||
"reqwest",
|
||||
"rustls 0.21.6",
|
||||
"sentry-backtrace",
|
||||
"sentry-contexts",
|
||||
"sentry-core",
|
||||
"sentry-debug-images",
|
||||
"sentry-panic",
|
||||
"sentry-tracing",
|
||||
"tokio",
|
||||
"ureq",
|
||||
"webpki-roots 0.25.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-actix"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "795851be3047d9be16ea8a808383f89e75d2aebc9b53d25fe708c27bc56b4488"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"futures-util",
|
||||
"sentry-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-backtrace"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ac2bac6f310c4c4c4bb094d1541d32ae497f8c5c23405e85492cefdfe0971a9"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"sentry-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-contexts"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c3e17295cecdbacf66c5bd38d6e1147e09e1e9d824d2d5341f76638eda02a3a"
|
||||
dependencies = [
|
||||
"hostname",
|
||||
"libc",
|
||||
"os_info",
|
||||
"rustc_version",
|
||||
"sentry-core",
|
||||
"uname",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-core"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8339474f587f36cb110fa1ed1b64229eea6d47b0b886375579297b7e47aeb055"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"rand",
|
||||
"sentry-types",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-debug-images"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c11e7d2b809b06497a18a2e60f513206462ae2db27081dfb7be9ade1f329cc8"
|
||||
dependencies = [
|
||||
"findshlibs",
|
||||
"once_cell",
|
||||
"sentry-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-panic"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "875b69f506da75bd664029eafb05f8934297d2990192896d17325f066bd665b7"
|
||||
dependencies = [
|
||||
"sentry-backtrace",
|
||||
"sentry-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-tracing"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89feead9bdd116f8035e89567651340fc382db29240b6c55ef412078b08d1aa3"
|
||||
dependencies = [
|
||||
"sentry-backtrace",
|
||||
"sentry-core",
|
||||
"tracing-core",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-types"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99dc599bd6646884fc403d593cdcb9816dd67c50cff3271c01ff123617908dcd"
|
||||
dependencies = [
|
||||
"debugid",
|
||||
"getrandom",
|
||||
"hex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"time",
|
||||
"url",
|
||||
"uuid 1.4.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.183"
|
||||
@@ -4332,16 +4160,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"valuable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
||||
dependencies = [
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4363,12 +4181,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"
|
||||
|
||||
[[package]]
|
||||
name = "uname"
|
||||
version = "0.1.1"
|
||||
name = "unescaper"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b72f89f0ca32e4db1c04e2a72f5345d59796d4866a1ee0609084569f73683dc8"
|
||||
checksum = "a96a44ae11e25afb520af4534fd7b0bd8cd613e35a78def813b8cf41631fa3c8"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4443,7 +4261,6 @@ dependencies = [
|
||||
"form_urlencoded",
|
||||
"idna",
|
||||
"percent-encoding",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4483,12 +4300,6 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "valuable"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
@@ -4657,12 +4468,6 @@ dependencies = [
|
||||
"rustls-webpki 0.100.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.25.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc"
|
||||
|
||||
[[package]]
|
||||
name = "whatlang"
|
||||
version = "0.16.2"
|
||||
|
||||
@@ -14,6 +14,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
nom = "7.1.3"
|
||||
nom_locate = "4.1.0"
|
||||
unescaper = "0.1.2"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.29.0"
|
||||
|
||||
@@ -62,6 +62,7 @@ pub enum ErrorKind<'a> {
|
||||
MisusedGeoRadius,
|
||||
MisusedGeoBoundingBox,
|
||||
InvalidPrimary,
|
||||
InvalidEscapedNumber,
|
||||
ExpectedEof,
|
||||
ExpectedValue(ExpectedValueKind),
|
||||
MalformedValue,
|
||||
@@ -147,6 +148,9 @@ impl<'a> Display for Error<'a> {
|
||||
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
||||
}
|
||||
ErrorKind::InvalidEscapedNumber => {
|
||||
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
|
||||
}
|
||||
ErrorKind::ExpectedEof => {
|
||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||
}
|
||||
|
||||
@@ -545,6 +545,8 @@ impl<'a> std::fmt::Display for Token<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use FilterCondition as Fc;
|
||||
|
||||
use super::*;
|
||||
|
||||
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
|
||||
@@ -556,14 +558,22 @@ pub mod tests {
|
||||
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
|
||||
}
|
||||
|
||||
fn p(s: &str) -> impl std::fmt::Display + '_ {
|
||||
Fc::parse(s).unwrap().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_escaped() {
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
|
||||
// but it also works with other sequencies
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use FilterCondition as Fc;
|
||||
|
||||
fn p(s: &str) -> impl std::fmt::Display + '_ {
|
||||
Fc::parse(s).unwrap().unwrap()
|
||||
}
|
||||
|
||||
// Test equal
|
||||
insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
|
||||
insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
|
||||
|
||||
@@ -171,7 +171,24 @@ pub fn parse_value(input: Span) -> IResult<Token> {
|
||||
})
|
||||
})?;
|
||||
|
||||
Ok((input, value))
|
||||
match unescaper::unescape(value.value()) {
|
||||
Ok(content) => {
|
||||
if content.len() != value.value().len() {
|
||||
Ok((input, Token::new(value.original_span(), Some(content))))
|
||||
} else {
|
||||
Ok((input, value))
|
||||
}
|
||||
}
|
||||
Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
|
||||
Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind(
|
||||
value.original_span(),
|
||||
ErrorKind::InvalidEscapedNumber,
|
||||
))),
|
||||
Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind(
|
||||
value.original_span(),
|
||||
ErrorKind::MalformedValue,
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_value_component(c: char) -> bool {
|
||||
@@ -318,17 +335,17 @@ pub mod test {
|
||||
("\"cha'nnel\"", "cha'nnel", false),
|
||||
("I'm tamo", "I", false),
|
||||
// escaped thing but not quote
|
||||
(r#""\\""#, r#"\\"#, false),
|
||||
(r#""\\\\\\""#, r#"\\\\\\"#, false),
|
||||
(r#""aa\\aa""#, r#"aa\\aa"#, false),
|
||||
(r#""\\""#, r#"\"#, true),
|
||||
(r#""\\\\\\""#, r#"\\\"#, true),
|
||||
(r#""aa\\aa""#, r#"aa\aa"#, true),
|
||||
// with double quote
|
||||
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
|
||||
(r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
|
||||
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
|
||||
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
|
||||
(r#""\"\"""#, r#""""#, true),
|
||||
// with simple quote
|
||||
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
|
||||
(r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
|
||||
(r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
|
||||
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
|
||||
(r#"'\'\''"#, r#"''"#, true),
|
||||
];
|
||||
@@ -350,7 +367,14 @@ pub mod test {
|
||||
"Filter `{}` was not supposed to be escaped",
|
||||
input
|
||||
);
|
||||
assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
|
||||
assert_eq!(
|
||||
token.value(),
|
||||
expected,
|
||||
"Filter `{}` failed by giving `{}` instead of `{}`.",
|
||||
input,
|
||||
token.value(),
|
||||
expected
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -67,10 +67,6 @@ pub(crate) enum Batch {
|
||||
op: IndexOperation,
|
||||
must_create_index: bool,
|
||||
},
|
||||
IndexDocumentDeletionByFilter {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
},
|
||||
IndexCreation {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
@@ -114,6 +110,10 @@ pub(crate) enum IndexOperation {
|
||||
documents: Vec<Vec<String>>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
IndexDocumentDeletionByFilter {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
},
|
||||
DocumentClear {
|
||||
index_uid: String,
|
||||
tasks: Vec<Task>,
|
||||
@@ -155,7 +155,6 @@ impl Batch {
|
||||
| Batch::TaskDeletion(task)
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::IndexDocumentDeletionByFilter { task, .. }
|
||||
| Batch::IndexUpdate { task, .. } => vec![task.uid],
|
||||
Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
|
||||
tasks.iter().map(|task| task.uid).collect()
|
||||
@@ -167,6 +166,7 @@ impl Batch {
|
||||
| IndexOperation::DocumentClear { tasks, .. } => {
|
||||
tasks.iter().map(|task| task.uid).collect()
|
||||
}
|
||||
IndexOperation::IndexDocumentDeletionByFilter { task, .. } => vec![task.uid],
|
||||
IndexOperation::SettingsAndDocumentOperation {
|
||||
document_import_tasks: tasks,
|
||||
settings_tasks: other,
|
||||
@@ -194,8 +194,7 @@ impl Batch {
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid, .. }
|
||||
| IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
|
||||
| IndexDeletion { index_uid, .. } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -205,6 +204,7 @@ impl IndexOperation {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { index_uid, .. }
|
||||
| IndexOperation::DocumentDeletion { index_uid, .. }
|
||||
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
|
||||
| IndexOperation::DocumentClear { index_uid, .. }
|
||||
| IndexOperation::Settings { index_uid, .. }
|
||||
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
|
||||
@@ -239,9 +239,12 @@ impl IndexScheduler {
|
||||
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
match &task.kind {
|
||||
KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
|
||||
Ok(Some(Batch::IndexDocumentDeletionByFilter {
|
||||
index_uid: index_uid.clone(),
|
||||
task,
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::IndexDocumentDeletionByFilter {
|
||||
index_uid: index_uid.clone(),
|
||||
task,
|
||||
},
|
||||
must_create_index: false,
|
||||
}))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -536,7 +539,9 @@ impl IndexScheduler {
|
||||
let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued;
|
||||
|
||||
// If autobatching is disabled we only take one task at a time.
|
||||
let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 };
|
||||
// Otherwise, we take only a maximum of tasks to create batches.
|
||||
let tasks_limit =
|
||||
if self.autobatching_enabled { self.maximum_number_of_batched_tasks } else { 1 };
|
||||
|
||||
let enqueued = index_tasks
|
||||
.into_iter()
|
||||
@@ -896,51 +901,6 @@ impl IndexScheduler {
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
|
||||
let (index_uid, filter) =
|
||||
if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } =
|
||||
&task.kind
|
||||
{
|
||||
(index_uid, filter_expr)
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let index = {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, index_uid)?
|
||||
};
|
||||
let deleted_documents = delete_document_by_filter(filter, index);
|
||||
let original_filter = if let Some(Details::DocumentDeletionByFilter {
|
||||
original_filter,
|
||||
deleted_documents: _,
|
||||
}) = task.details
|
||||
{
|
||||
original_filter
|
||||
} else {
|
||||
// In the case of a `documentDeleteByFilter` the details MUST be set
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
match deleted_documents {
|
||||
Ok(deleted_documents) => {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentDeletionByFilter {
|
||||
original_filter,
|
||||
deleted_documents: Some(deleted_documents),
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
task.status = Status::Failed;
|
||||
task.details = Some(Details::DocumentDeletionByFilter {
|
||||
original_filter,
|
||||
deleted_documents: Some(0),
|
||||
});
|
||||
task.error = Some(e.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(vec![task])
|
||||
}
|
||||
Batch::IndexCreation { index_uid, primary_key, task } => {
|
||||
let wtxn = self.env.write_txn()?;
|
||||
if self.index_mapper.exists(&wtxn, &index_uid)? {
|
||||
@@ -1299,6 +1259,47 @@ impl IndexScheduler {
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
|
||||
let filter =
|
||||
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
|
||||
&task.kind
|
||||
{
|
||||
filter_expr
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
|
||||
let original_filter = if let Some(Details::DocumentDeletionByFilter {
|
||||
original_filter,
|
||||
deleted_documents: _,
|
||||
}) = task.details
|
||||
{
|
||||
original_filter
|
||||
} else {
|
||||
// In the case of a `documentDeleteByFilter` the details MUST be set
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
match deleted_documents {
|
||||
Ok(deleted_documents) => {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentDeletionByFilter {
|
||||
original_filter,
|
||||
deleted_documents: Some(deleted_documents),
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
task.status = Status::Failed;
|
||||
task.details = Some(Details::DocumentDeletionByFilter {
|
||||
original_filter,
|
||||
deleted_documents: Some(0),
|
||||
});
|
||||
task.error = Some(e.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(vec![task])
|
||||
}
|
||||
IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||
@@ -1498,23 +1499,22 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result<u64> {
|
||||
fn delete_document_by_filter<'a>(
|
||||
wtxn: &mut RwTxn<'a, '_>,
|
||||
filter: &serde_json::Value,
|
||||
index: &'a Index,
|
||||
) -> Result<u64> {
|
||||
let filter = Filter::from_json(filter)?;
|
||||
Ok(if let Some(filter) = filter {
|
||||
let mut wtxn = index.write_txn()?;
|
||||
|
||||
let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err {
|
||||
let candidates = filter.evaluate(wtxn, index).map_err(|err| match err {
|
||||
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
|
||||
}
|
||||
e => e.into(),
|
||||
})?;
|
||||
let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?;
|
||||
let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
|
||||
delete_operation.delete_documents(&candidates);
|
||||
let deleted_documents =
|
||||
delete_operation.execute().map(|result| result.deleted_documents)?;
|
||||
wtxn.commit()?;
|
||||
deleted_documents
|
||||
delete_operation.execute().map(|result| result.deleted_documents)?
|
||||
} else {
|
||||
0
|
||||
})
|
||||
|
||||
@@ -15,6 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
|
||||
let IndexScheduler {
|
||||
autobatching_enabled,
|
||||
maximum_number_of_batched_tasks: _,
|
||||
must_stop_processing: _,
|
||||
processing_tasks,
|
||||
file_store,
|
||||
|
||||
@@ -253,6 +253,9 @@ pub struct IndexSchedulerOptions {
|
||||
/// Set to `true` iff the index scheduler is allowed to automatically
|
||||
/// batch tasks together, to process multiple tasks at once.
|
||||
pub autobatching_enabled: bool,
|
||||
/// If the autobatcher is allowed to automatically batch tasks
|
||||
/// it will only batch this defined number of tasks at once.
|
||||
pub maximum_number_of_batched_tasks: usize,
|
||||
/// The maximum number of tasks stored in the task queue before starting
|
||||
/// to auto schedule task deletions.
|
||||
pub max_number_of_tasks: usize,
|
||||
@@ -310,6 +313,9 @@ pub struct IndexScheduler {
|
||||
/// Whether auto-batching is enabled or not.
|
||||
pub(crate) autobatching_enabled: bool,
|
||||
|
||||
/// The maximum number of tasks that will be batched together.
|
||||
pub(crate) maximum_number_of_batched_tasks: usize,
|
||||
|
||||
/// The max number of tasks allowed before the scheduler starts to delete
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
@@ -363,6 +369,7 @@ impl IndexScheduler {
|
||||
index_mapper: self.index_mapper.clone(),
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
maximum_number_of_batched_tasks: self.maximum_number_of_batched_tasks,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
@@ -458,6 +465,7 @@ impl IndexScheduler {
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
maximum_number_of_batched_tasks: options.maximum_number_of_batched_tasks,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
dumps_path: options.dumps_path,
|
||||
snapshots_path: options.snapshots_path,
|
||||
@@ -1589,6 +1597,7 @@ mod tests {
|
||||
index_count: 5,
|
||||
indexer_config,
|
||||
autobatching_enabled: true,
|
||||
maximum_number_of_batched_tasks: usize::MAX,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
instance_features: Default::default(),
|
||||
};
|
||||
|
||||
@@ -83,16 +83,6 @@ rustls-pemfile = "1.0.2"
|
||||
segment = { version = "0.2.2", optional = true }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
sentry = { version = "0.31.6", default-features = false, features = [
|
||||
"backtrace",
|
||||
"contexts",
|
||||
"debug-images",
|
||||
"panic",
|
||||
"reqwest",
|
||||
"rustls",
|
||||
] }
|
||||
sentry-actix = "0.31.6"
|
||||
serde_urlencoded = "0.7.1"
|
||||
sha2 = "0.10.6"
|
||||
siphasher = "0.3.10"
|
||||
slice-group-by = "0.3.0"
|
||||
@@ -100,7 +90,6 @@ static-files = { version = "0.2.3", optional = true }
|
||||
sysinfo = "0.29.7"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.5.0"
|
||||
termcolor = "1.2.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = [
|
||||
"serde-well-known",
|
||||
@@ -114,6 +103,8 @@ toml = "0.7.3"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
walkdir = "2.3.3"
|
||||
yaup = "0.2.1"
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.2.0"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.8.0"
|
||||
@@ -142,10 +133,20 @@ vergen = { version = "7.5.1", default-features = false, features = ["git"] }
|
||||
zip = { version = "0.6.4", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard", "profile-with-puffin"]
|
||||
analytics = ["segment"]
|
||||
profile-with-puffin = ["dep:puffin_http"]
|
||||
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
|
||||
mini-dashboard = [
|
||||
"actix-web-static-files",
|
||||
"static-files",
|
||||
"anyhow",
|
||||
"cargo_toml",
|
||||
"hex",
|
||||
"reqwest",
|
||||
"sha-1",
|
||||
"tempfile",
|
||||
"zip",
|
||||
]
|
||||
chinese = ["meilisearch-types/chinese"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
|
||||
@@ -285,6 +285,7 @@ impl From<Opt> for Infos {
|
||||
db_path,
|
||||
experimental_enable_metrics,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_limit_batched_tasks: _,
|
||||
http_addr,
|
||||
master_key: _,
|
||||
env,
|
||||
|
||||
@@ -101,7 +101,7 @@ pub fn create_app(
|
||||
InitError = (),
|
||||
>,
|
||||
> {
|
||||
actix_web::App::new()
|
||||
let app = actix_web::App::new()
|
||||
.configure(|s| {
|
||||
configure_data(
|
||||
s,
|
||||
@@ -112,23 +112,23 @@ pub fn create_app(
|
||||
)
|
||||
})
|
||||
.configure(routes::configure)
|
||||
.configure(|s| dashboard(s, enable_dashboard))
|
||||
.wrap(sentry_actix::Sentry::new())
|
||||
.wrap(actix_web::middleware::Condition::new(
|
||||
opt.experimental_enable_metrics,
|
||||
middleware::RouteMetrics,
|
||||
))
|
||||
.wrap(
|
||||
Cors::default()
|
||||
.send_wildcard()
|
||||
.allow_any_header()
|
||||
.allow_any_origin()
|
||||
.allow_any_method()
|
||||
.max_age(86_400), // 24h
|
||||
)
|
||||
.wrap(actix_web::middleware::Logger::default())
|
||||
.wrap(actix_web::middleware::Compress::default())
|
||||
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
|
||||
.configure(|s| dashboard(s, enable_dashboard));
|
||||
|
||||
let app = app.wrap(actix_web::middleware::Condition::new(
|
||||
opt.experimental_enable_metrics,
|
||||
middleware::RouteMetrics,
|
||||
));
|
||||
app.wrap(
|
||||
Cors::default()
|
||||
.send_wildcard()
|
||||
.allow_any_header()
|
||||
.allow_any_origin()
|
||||
.allow_any_method()
|
||||
.max_age(86_400), // 24h
|
||||
)
|
||||
.wrap(actix_web::middleware::Logger::default())
|
||||
.wrap(actix_web::middleware::Compress::default())
|
||||
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
|
||||
}
|
||||
|
||||
enum OnFailure {
|
||||
@@ -236,6 +236,7 @@ fn open_or_create_database_unchecked(
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
maximum_number_of_batched_tasks: opt.experimental_limit_batched_tasks,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
|
||||
@@ -30,13 +30,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
let _sentry = sentry::init(sentry::ClientOptions {
|
||||
release: sentry::release_name!(),
|
||||
session_mode: sentry::SessionMode::Request,
|
||||
auto_session_tracking: true,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
#[cfg(feature = "profile-with-puffin")]
|
||||
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
|
||||
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
|
||||
|
||||
@@ -51,6 +51,7 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
|
||||
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS: &str = "MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS";
|
||||
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
@@ -301,6 +302,11 @@ pub struct Opt {
|
||||
#[serde(default)]
|
||||
pub experimental_reduce_indexing_memory_usage: bool,
|
||||
|
||||
/// Experimental limit to the number of tasks per batch
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())]
|
||||
#[serde(default = "default_limit_batched_tasks")]
|
||||
pub experimental_limit_batched_tasks: usize,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
@@ -393,7 +399,8 @@ impl Opt {
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
no_analytics,
|
||||
experimental_enable_metrics: enable_metrics_route,
|
||||
experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_limit_batched_tasks,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
@@ -437,7 +444,11 @@ impl Opt {
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
|
||||
reduce_indexing_memory_usage.to_string(),
|
||||
experimental_reduce_indexing_memory_usage.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS,
|
||||
experimental_limit_batched_tasks.to_string(),
|
||||
);
|
||||
indexer_options.export_to_env();
|
||||
}
|
||||
@@ -739,6 +750,10 @@ fn default_dump_dir() -> PathBuf {
|
||||
PathBuf::from(DEFAULT_DUMP_DIR)
|
||||
}
|
||||
|
||||
fn default_limit_batched_tasks() -> usize {
|
||||
usize::MAX
|
||||
}
|
||||
|
||||
/// Indicates if a snapshot was scheduled, and if yes with which interval.
|
||||
#[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)]
|
||||
pub enum ScheduleSnapshot {
|
||||
|
||||
@@ -60,8 +60,7 @@ pub async fn swap_indexes(
|
||||
}
|
||||
|
||||
let task = KindWithContent::IndexSwap { swaps };
|
||||
|
||||
let task = index_scheduler.register(task)?;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -154,6 +154,19 @@ async fn delete_document_by_filter() {
|
||||
)
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (stats, _) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 4,
|
||||
"isIndexing": false,
|
||||
"fieldDistribution": {
|
||||
"color": 3,
|
||||
"id": 4
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) =
|
||||
index.delete_document_by_filter(json!({ "filter": "color = blue"})).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
@@ -188,6 +201,18 @@ async fn delete_document_by_filter() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (stats, _) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"isIndexing": false,
|
||||
"fieldDistribution": {
|
||||
"color": 1,
|
||||
"id": 2
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
@@ -241,6 +266,18 @@ async fn delete_document_by_filter() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (stats, _) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 1,
|
||||
"isIndexing": false,
|
||||
"fieldDistribution": {
|
||||
"color": 1,
|
||||
"id": 1
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
|
||||
@@ -1104,3 +1104,59 @@ async fn camelcased_words() {
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search_with_strange_synonyms() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.update_settings(json!({ "synonyms": {"&": ["to"], "to": ["&"]} })).await;
|
||||
let r = index.wait_task(0).await;
|
||||
meili_snap::snapshot!(r["status"], @r###""succeeded""###);
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "How to train"}), |response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "How & train"}), |response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "to"}), |response, code| {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -17,7 +17,8 @@ bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.8.3", default-features = false }
|
||||
# charabia = { version = "0.8.3", default-features = false }
|
||||
charabia = { git = "https://github.com/meilisearch/charabia", branch = "main", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
|
||||
@@ -418,19 +418,11 @@ impl<'t> Matcher<'t, '_> {
|
||||
} else {
|
||||
match &self.matches {
|
||||
Some((tokens, matches)) => {
|
||||
// If the text has to be cropped,
|
||||
// compute the best interval to crop around.
|
||||
let matches = match format_options.crop {
|
||||
Some(crop_size) if crop_size > 0 => {
|
||||
self.find_best_match_interval(matches, crop_size)
|
||||
}
|
||||
_ => matches,
|
||||
};
|
||||
|
||||
// If the text has to be cropped,
|
||||
// crop around the best interval.
|
||||
let (byte_start, byte_end) = match format_options.crop {
|
||||
Some(crop_size) if crop_size > 0 => {
|
||||
let matches = self.find_best_match_interval(matches, crop_size);
|
||||
self.crop_bounds(tokens, matches, crop_size)
|
||||
}
|
||||
_ => (0, self.text.len()),
|
||||
@@ -450,6 +442,11 @@ impl<'t> Matcher<'t, '_> {
|
||||
for m in matches {
|
||||
let token = &tokens[m.token_position];
|
||||
|
||||
// skip matches out of the crop window.
|
||||
if token.byte_start < byte_start || token.byte_end > byte_end {
|
||||
continue;
|
||||
}
|
||||
|
||||
if byte_index < token.byte_start {
|
||||
formatted.push(&self.text[byte_index..token.byte_start]);
|
||||
}
|
||||
@@ -800,6 +797,37 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_highlight_crop_phrase_query() {
|
||||
//! testing: https://github.com/meilisearch/meilisearch/issues/3975
|
||||
let temp_index = TempIndex::new();
|
||||
temp_index
|
||||
.add_documents(documents!([
|
||||
{ "id": 1, "text": "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!" }
|
||||
]))
|
||||
.unwrap();
|
||||
let rtxn = temp_index.read_txn().unwrap();
|
||||
|
||||
let format_options = FormatOptions { highlight: true, crop: Some(10) };
|
||||
let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
|
||||
|
||||
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
|
||||
let mut matcher = builder.build(text);
|
||||
// should return 10 words with a marker at the start as well the end, and the highlighted matches.
|
||||
insta::assert_snapshot!(
|
||||
matcher.format(format_options),
|
||||
@"…had the power to split <em>the</em> <em>world</em> between those who…"
|
||||
);
|
||||
|
||||
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
|
||||
let mut matcher = builder.build(text);
|
||||
// should highlight "those" and the phrase "and those".
|
||||
insta::assert_snapshot!(
|
||||
matcher.format(format_options),
|
||||
@"…world between <em>those</em> who embraced progress <em>and</em> <em>those</em> who resisted…"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smaller_crop_size() {
|
||||
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
||||
|
||||
@@ -226,9 +226,9 @@ fn process_tokens<'a>(
|
||||
) -> impl Iterator<Item = (usize, Token<'a>)> {
|
||||
tokens
|
||||
.skip_while(|token| token.is_separator())
|
||||
.scan((0, None), |(offset, prev_kind), token| {
|
||||
.scan((0, None), |(offset, prev_kind), mut token| {
|
||||
match token.kind {
|
||||
TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
|
||||
TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
|
||||
*offset += match *prev_kind {
|
||||
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
|
||||
Some(_) => 1,
|
||||
@@ -244,7 +244,7 @@ fn process_tokens<'a>(
|
||||
{
|
||||
*prev_kind = Some(token.kind);
|
||||
}
|
||||
_ => (),
|
||||
_ => token.kind = TokenKind::Unknown,
|
||||
}
|
||||
Some((*offset, token))
|
||||
})
|
||||
|
||||
@@ -59,7 +59,13 @@ pub(crate) fn data_from_obkv_documents(
|
||||
original_obkv_chunks
|
||||
.par_bridge()
|
||||
.map(|original_documents_chunk| {
|
||||
send_original_documents_data(original_documents_chunk, lmdb_writer_sx.clone())
|
||||
send_original_documents_data(
|
||||
original_documents_chunk,
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
vectors_field_id,
|
||||
primary_key_id,
|
||||
)
|
||||
})
|
||||
.collect::<Result<()>>()?;
|
||||
|
||||
@@ -76,7 +82,6 @@ pub(crate) fn data_from_obkv_documents(
|
||||
&faceted_fields,
|
||||
primary_key_id,
|
||||
geo_fields_ids,
|
||||
vectors_field_id,
|
||||
&stop_words,
|
||||
&allowed_separators,
|
||||
&dictionary,
|
||||
@@ -265,11 +270,33 @@ fn spawn_extraction_task<FE, FS, M>(
|
||||
/// - documents
|
||||
fn send_original_documents_data(
|
||||
original_documents_chunk: Result<grenad::Reader<File>>,
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
primary_key_id: FieldId,
|
||||
) -> Result<()> {
|
||||
let original_documents_chunk =
|
||||
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
||||
|
||||
if let Some(vectors_field_id) = vectors_field_id {
|
||||
let documents_chunk_cloned = original_documents_chunk.clone();
|
||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||
rayon::spawn(move || {
|
||||
let result = extract_vector_points(
|
||||
documents_chunk_cloned,
|
||||
indexer,
|
||||
primary_key_id,
|
||||
vectors_field_id,
|
||||
);
|
||||
let _ = match result {
|
||||
Ok(vector_points) => {
|
||||
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
|
||||
}
|
||||
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: create a custom internal error
|
||||
lmdb_writer_sx.send(Ok(TypedChunk::Documents(original_documents_chunk))).unwrap();
|
||||
Ok(())
|
||||
@@ -291,7 +318,6 @@ fn send_and_extract_flattened_documents_data(
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
primary_key_id: FieldId,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
stop_words: &Option<fst::Set<&[u8]>>,
|
||||
allowed_separators: &Option<&[&str]>,
|
||||
dictionary: &Option<&[&str]>,
|
||||
@@ -322,25 +348,6 @@ fn send_and_extract_flattened_documents_data(
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(vectors_field_id) = vectors_field_id {
|
||||
let documents_chunk_cloned = flattened_documents_chunk.clone();
|
||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||
rayon::spawn(move || {
|
||||
let result = extract_vector_points(
|
||||
documents_chunk_cloned,
|
||||
indexer,
|
||||
primary_key_id,
|
||||
vectors_field_id,
|
||||
);
|
||||
let _ = match result {
|
||||
Ok(vector_points) => {
|
||||
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
|
||||
}
|
||||
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
|
||||
rayon::join(
|
||||
|| {
|
||||
|
||||
@@ -2550,6 +2550,25 @@ mod tests {
|
||||
db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
|
||||
}
|
||||
|
||||
/// Index multiple different number of vectors in documents.
|
||||
/// Vectors must be of the same length.
|
||||
#[test]
|
||||
fn test_multiple_vectors() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index.add_documents(documents!([{"id": 0, "_vectors": [[0, 1, 2], [3, 4, 5]] }])).unwrap();
|
||||
index.add_documents(documents!([{"id": 1, "_vectors": [6, 7, 8] }])).unwrap();
|
||||
index
|
||||
.add_documents(
|
||||
documents!([{"id": 2, "_vectors": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let res = index.search(&rtxn).vector([0.0, 1.0, 2.0]).execute().unwrap();
|
||||
assert_eq!(res.documents_ids.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reproduce_the_bug() {
|
||||
/*
|
||||
|
||||
@@ -573,7 +573,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
tokenizer
|
||||
.tokenize(text)
|
||||
.filter_map(|token| {
|
||||
if token.is_word() {
|
||||
if token.is_word() && !token.lemma().is_empty() {
|
||||
Some(token.lemma().to_string())
|
||||
} else {
|
||||
None
|
||||
@@ -608,13 +608,18 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
for (word, synonyms) in user_synonyms {
|
||||
// Normalize both the word and associated synonyms.
|
||||
let normalized_word = normalize(&tokenizer, word);
|
||||
let normalized_synonyms =
|
||||
synonyms.iter().map(|synonym| normalize(&tokenizer, synonym));
|
||||
let normalized_synonyms: Vec<_> = synonyms
|
||||
.iter()
|
||||
.map(|synonym| normalize(&tokenizer, synonym))
|
||||
.filter(|synonym| !synonym.is_empty())
|
||||
.collect();
|
||||
|
||||
// Store the normalized synonyms under the normalized word,
|
||||
// merging the possible duplicate words.
|
||||
let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new);
|
||||
entry.extend(normalized_synonyms);
|
||||
if !normalized_word.is_empty() && !normalized_synonyms.is_empty() {
|
||||
let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new);
|
||||
entry.extend(normalized_synonyms.into_iter());
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure that we don't have duplicate synonyms.
|
||||
@@ -1422,6 +1427,43 @@ mod tests {
|
||||
assert!(result.documents_ids.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn thai_synonyms() {
|
||||
let mut index = TempIndex::new();
|
||||
index.index_documents_config.autogenerate_docids = true;
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
// Send 3 documents with ids from 1 to 3.
|
||||
index
|
||||
.add_documents_using_wtxn(
|
||||
&mut wtxn,
|
||||
documents!([
|
||||
{ "name": "ยี่ปุ่น" },
|
||||
{ "name": "ญี่ปุ่น" },
|
||||
]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// In the same transaction provide some synonyms
|
||||
index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
settings.set_synonyms(btreemap! {
|
||||
"japanese".to_string() => vec![S("ญี่ปุ่น"), S("ยี่ปุ่น")],
|
||||
});
|
||||
})
|
||||
.unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Ensure synonyms are effectively stored
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let synonyms = index.synonyms(&rtxn).unwrap();
|
||||
assert!(!synonyms.is_empty()); // at this point the index should return something
|
||||
|
||||
// Check that we can use synonyms
|
||||
let result = index.search(&rtxn).query("japanese").execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn setting_searchable_recomputes_other_settings() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
@@ -186,12 +186,16 @@ fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document {
|
||||
let array = create_array(array, &sub_selectors);
|
||||
if !array.is_empty() {
|
||||
new_value.insert(key.to_string(), array.into());
|
||||
} else {
|
||||
new_value.insert(key.to_string(), Value::Array(vec![]));
|
||||
}
|
||||
}
|
||||
Value::Object(object) => {
|
||||
let object = create_value(object, sub_selectors);
|
||||
if !object.is_empty() {
|
||||
new_value.insert(key.to_string(), object.into());
|
||||
} else {
|
||||
new_value.insert(key.to_string(), Value::Object(Map::new()));
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
@@ -211,6 +215,8 @@ fn create_array(array: &[Value], selectors: &HashSet<&str>) -> Vec<Value> {
|
||||
let array = create_array(array, selectors);
|
||||
if !array.is_empty() {
|
||||
res.push(array.into());
|
||||
} else {
|
||||
res.push(Value::Array(vec![]));
|
||||
}
|
||||
}
|
||||
Value::Object(object) => {
|
||||
@@ -637,6 +643,24 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_array_object_return_empty() {
|
||||
let value: Value = json!({
|
||||
"array": [],
|
||||
"object": {},
|
||||
});
|
||||
let value: &Document = value.as_object().unwrap();
|
||||
|
||||
let res: Value = select_values(value, vec!["array.name", "object.name"]).into();
|
||||
assert_eq!(
|
||||
res,
|
||||
json!({
|
||||
"array": [],
|
||||
"object": {},
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all_conflict_variation() {
|
||||
let value: Value = json!({
|
||||
|
||||
Reference in New Issue
Block a user