Compare commits

..

1 Commits

Author SHA1 Message Date
Louis Dureuil
40215bfec3 TMP: check windows free disk space 2024-05-29 11:27:27 +02:00
42 changed files with 493 additions and 1725 deletions

View File

@@ -56,6 +56,12 @@ jobs:
matrix: matrix:
os: [macos-12, windows-2022] os: [macos-12, windows-2022]
steps: steps:
- name: Check free disk space on C
run: |
fsutil volume diskfree c:
- name: Check free disk space on D
run: |
fsutil volume diskfree d:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1 uses: Swatinem/rust-cache@v2.7.1
@@ -63,11 +69,23 @@ jobs:
with: with:
toolchain: stable toolchain: stable
override: true override: true
- name: Check free disk space on C
run: |
fsutil volume diskfree c:
- name: Check free disk space on D
run: |
fsutil volume diskfree d:
- name: Run cargo check without any default features - name: Run cargo check without any default features
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
command: build command: build
args: --locked --release --no-default-features --all args: --locked --release --no-default-features --all
- name: Check free disk space on C
run: |
fsutil volume diskfree c:
- name: Check free disk space on D
run: |
fsutil volume diskfree d:
- name: Run cargo test - name: Run cargo test
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:

325
Cargo.lock generated
View File

@@ -36,9 +36,9 @@ dependencies = [
[[package]] [[package]]
name = "actix-http" name = "actix-http"
version = "3.7.0" version = "3.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eb9843d84c775696c37d9a418bbb01b932629d01870722c0f13eb3f95e2536d" checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743"
dependencies = [ dependencies = [
"actix-codec", "actix-codec",
"actix-rt", "actix-rt",
@@ -46,7 +46,7 @@ dependencies = [
"actix-tls", "actix-tls",
"actix-utils", "actix-utils",
"ahash", "ahash",
"base64 0.22.1", "base64 0.21.7",
"bitflags 2.5.0", "bitflags 2.5.0",
"brotli", "brotli",
"bytes", "bytes",
@@ -85,15 +85,13 @@ dependencies = [
[[package]] [[package]]
name = "actix-router" name = "actix-router"
version = "0.5.3" version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8" checksum = "d66ff4d247d2b160861fa2866457e85706833527840e4133f8f49aa423a38799"
dependencies = [ dependencies = [
"bytestring", "bytestring",
"cfg-if",
"http 0.2.11", "http 0.2.11",
"regex", "regex",
"regex-lite",
"serde", "serde",
"tracing", "tracing",
] ]
@@ -140,9 +138,9 @@ dependencies = [
[[package]] [[package]]
name = "actix-tls" name = "actix-tls"
version = "3.4.0" version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac453898d866cdbecdbc2334fe1738c747b4eba14a677261f2b768ba05329389" checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f"
dependencies = [ dependencies = [
"actix-rt", "actix-rt",
"actix-service", "actix-service",
@@ -169,9 +167,9 @@ dependencies = [
[[package]] [[package]]
name = "actix-web" name = "actix-web"
version = "4.6.0" version = "4.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1cf67dadb19d7c95e5a299e2dda24193b89d5d4f33a3b9800888ede9e19aa32" checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984"
dependencies = [ dependencies = [
"actix-codec", "actix-codec",
"actix-http", "actix-http",
@@ -198,7 +196,7 @@ dependencies = [
"mime", "mime",
"once_cell", "once_cell",
"pin-project-lite", "pin-project-lite",
"regex-lite", "regex",
"serde", "serde",
"serde_json", "serde_json",
"serde_urlencoded", "serde_urlencoded",
@@ -222,9 +220,8 @@ dependencies = [
[[package]] [[package]]
name = "actix-web-static-files" name = "actix-web-static-files"
version = "4.0.1" version = "3.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "git+https://github.com/kilork/actix-web-static-files.git?rev=2d3b6160#2d3b6160f0de4ba061c5d76b5704f34fb677f6df"
checksum = "adf6d1ef6d7a60e084f9e0595e2a5234abda14e76c105ecf8e2d0e8800c41a1f"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"derive_more", "derive_more",
@@ -503,7 +500,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]] [[package]]
name = "benchmarks" name = "benchmarks"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytes", "bytes",
@@ -616,9 +613,9 @@ dependencies = [
[[package]] [[package]]
name = "brotli" name = "brotli"
version = "6.0.0" version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f"
dependencies = [ dependencies = [
"alloc-no-stdlib", "alloc-no-stdlib",
"alloc-stdlib", "alloc-stdlib",
@@ -627,9 +624,9 @@ dependencies = [
[[package]] [[package]]
name = "brotli-decompressor" name = "brotli-decompressor"
version = "4.0.1" version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
dependencies = [ dependencies = [
"alloc-no-stdlib", "alloc-no-stdlib",
"alloc-stdlib", "alloc-stdlib",
@@ -648,7 +645,7 @@ dependencies = [
[[package]] [[package]]
name = "build-info" name = "build-info"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"time", "time",
@@ -898,9 +895,9 @@ dependencies = [
[[package]] [[package]]
name = "charabia" name = "charabia"
version = "0.8.11" version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11a09ae38cfcc153f01576c3f579dfd916e0320f1b474f298c8d680b2dd92eb6" checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"cow-utils", "cow-utils",
@@ -989,7 +986,7 @@ dependencies = [
"anstream", "anstream",
"anstyle", "anstyle",
"clap_lex", "clap_lex",
"strsim 0.10.0", "strsim",
] ]
[[package]] [[package]]
@@ -1280,12 +1277,12 @@ dependencies = [
[[package]] [[package]]
name = "darling" name = "darling"
version = "0.20.9" version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e"
dependencies = [ dependencies = [
"darling_core 0.20.9", "darling_core 0.20.3",
"darling_macro 0.20.9", "darling_macro 0.20.3",
] ]
[[package]] [[package]]
@@ -1298,21 +1295,21 @@ dependencies = [
"ident_case", "ident_case",
"proc-macro2", "proc-macro2",
"quote", "quote",
"strsim 0.10.0", "strsim",
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]] [[package]]
name = "darling_core" name = "darling_core"
version = "0.20.9" version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621"
dependencies = [ dependencies = [
"fnv", "fnv",
"ident_case", "ident_case",
"proc-macro2", "proc-macro2",
"quote", "quote",
"strsim 0.11.1", "strsim",
"syn 2.0.60", "syn 2.0.60",
] ]
@@ -1329,11 +1326,11 @@ dependencies = [
[[package]] [[package]]
name = "darling_macro" name = "darling_macro"
version = "0.20.9" version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5"
dependencies = [ dependencies = [
"darling_core 0.20.9", "darling_core 0.20.3",
"quote", "quote",
"syn 2.0.60", "syn 2.0.60",
] ]
@@ -1386,15 +1383,6 @@ dependencies = [
"derive_builder_macro 0.13.1", "derive_builder_macro 0.13.1",
] ]
[[package]]
name = "derive_builder"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
dependencies = [
"derive_builder_macro 0.20.0",
]
[[package]] [[package]]
name = "derive_builder_core" name = "derive_builder_core"
version = "0.12.0" version = "0.12.0"
@@ -1419,18 +1407,6 @@ dependencies = [
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]]
name = "derive_builder_core"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
dependencies = [
"darling 0.20.9",
"proc-macro2",
"quote",
"syn 2.0.60",
]
[[package]] [[package]]
name = "derive_builder_macro" name = "derive_builder_macro"
version = "0.12.0" version = "0.12.0"
@@ -1451,16 +1427,6 @@ dependencies = [
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]]
name = "derive_builder_macro"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
dependencies = [
"derive_builder_core 0.20.0",
"syn 2.0.60",
]
[[package]] [[package]]
name = "derive_more" name = "derive_more"
version = "0.99.17" version = "0.99.17"
@@ -1488,7 +1454,7 @@ dependencies = [
"serde-cs", "serde-cs",
"serde_json", "serde_json",
"serde_urlencoded", "serde_urlencoded",
"strsim 0.10.0", "strsim",
] ]
[[package]] [[package]]
@@ -1579,7 +1545,7 @@ dependencies = [
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@@ -1741,6 +1707,29 @@ dependencies = [
"syn 2.0.60", "syn 2.0.60",
] ]
[[package]]
name = "env_filter"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
dependencies = [
"log",
"regex",
]
[[package]]
name = "env_logger"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9"
dependencies = [
"anstream",
"anstyle",
"env_filter",
"humantime",
"log",
]
[[package]] [[package]]
name = "equivalent" name = "equivalent"
version = "1.0.1" version = "1.0.1"
@@ -1795,7 +1784,7 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5" checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5"
dependencies = [ dependencies = [
"darling 0.20.9", "darling 0.20.3",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.60", "syn 2.0.60",
@@ -1804,7 +1793,7 @@ dependencies = [
[[package]] [[package]]
name = "file-store" name = "file-store"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"faux", "faux",
"tempfile", "tempfile",
@@ -1827,7 +1816,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"insta", "insta",
"nom", "nom",
@@ -1847,7 +1836,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -1965,7 +1954,7 @@ dependencies = [
[[package]] [[package]]
name = "fuzzers" name = "fuzzers"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"clap", "clap",
@@ -2390,6 +2379,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]] [[package]]
name = "hyper" name = "hyper"
version = "0.14.27" version = "0.14.27"
@@ -2452,7 +2447,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
[[package]] [[package]]
name = "index-scheduler" name = "index-scheduler"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@@ -2647,7 +2642,7 @@ dependencies = [
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -2783,9 +2778,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera" name = "lindera"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcd4fa369654517f72c10b24adf03ad4ce69d19facb79c3cb3cf9b4580ac352f" checksum = "a1bbf252ea3490053dc397539ece0b510924f2f72605fa28d3e858d86f43ec88"
dependencies = [ dependencies = [
"lindera-analyzer", "lindera-analyzer",
"lindera-core", "lindera-core",
@@ -2796,9 +2791,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-analyzer" name = "lindera-analyzer"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2cba7fe275cb8ec4c594cfee9cc39e48b71e02a089457d52f3e70dc146a8133" checksum = "87febfec0e2859ce2154fb90dd6f66b774ddb0b6e264b44f8e3d1303c9dcedd7"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@@ -2826,9 +2821,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict" name = "lindera-cc-cedict"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240adf9faba3f09ad16557aefcd316dd00ebb940ac94334a629660d772f118c1" checksum = "fcb91bb8a93ab0f95dbc3c43b5105354bb059134ef731154f75a64b5d919e71d"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -2840,21 +2835,29 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict-builder" name = "lindera-cc-cedict-builder"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f12241f9e74babe708a0b9441d9f3fa67cb29fd01257918f30ffd480ca568820" checksum = "f6022a8309a287dbef425fd09a61585351670c83001d74f6c089979e2330b683"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"lindera-dictionary-builder", "log",
"yada",
] ]
[[package]] [[package]]
name = "lindera-compress" name = "lindera-compress"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50f9f7a858d70ff9e4383cbd507ca9e98c8faf0319e08c10df4c30cb58c9ca6c" checksum = "32363cbcf433f915e7d77c2a0c410db2d6b23442e80715cf2cf6b9864078a500"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@@ -2863,9 +2866,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-core" name = "lindera-core"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f09810ab98ce2a084d788ac38fbb7b31697f34bc47c61de0d880320a674bd15" checksum = "d9a0e858753a02b1a3524fae4fbb11ca4b3a947128fd7854b797386562678be8"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@@ -2880,9 +2883,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-decompress" name = "lindera-decompress"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d53400c9b2dd6b45f82d9fa5b5efe079f3acaf6ce609dba8d42c8a76baaa2b12" checksum = "0e406345f6f8b665b9a129c67079c18ca9d97e9d171d102b4106a64a592c285e"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@@ -2891,9 +2894,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-dictionary" name = "lindera-dictionary"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2053d064a515839250438b8dfa6cf445e2b97633232ded34a54f267e945d196e" checksum = "3e2a3ec0e5fd6768a27c6ec1040e8470d3a5926418f7afe065859e98aabb3bfe"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@@ -2914,33 +2917,11 @@ dependencies = [
"strum_macros", "strum_macros",
] ]
[[package]]
name = "lindera-dictionary-builder"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14f486924055f8bedcc5877572e4dc91fbc10370862430ac2e5f7f0d671a18c8"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"csv",
"derive_builder 0.20.0",
"encoding",
"encoding_rs",
"encoding_rs_io",
"glob",
"lindera-compress",
"lindera-core",
"lindera-decompress",
"log",
"yada",
]
[[package]] [[package]]
name = "lindera-filter" name = "lindera-filter"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb3904fc279f0297f6fd6210435adab1f8c82ba84eba8635407c791af51c0d8a" checksum = "1badaf51bad051185ea4917ba91bbbf2d6f8167e155647e21e0eaaef0982a95d"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"csv", "csv",
@@ -2963,9 +2944,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic" name = "lindera-ipadic"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4aa3ef2f1f6838b0fa2e2fca2896242bb83bc877c1760cdb6fa23449ab95d664" checksum = "129ec16366354998f9791467ad38731539197747f649e573ead845358271ce25"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -2977,21 +2958,31 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-builder" name = "lindera-ipadic-builder"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a41287db18eadb58d73a04d49778d41c161549fbbbe155d4338976b7b8541c7d" checksum = "7f0979a56bc57e9c9be2996dff232c47aa146a2e7baebf5dd567e388eba3dd90"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding_rs",
"encoding_rs_io",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"lindera-dictionary-builder", "log",
"serde",
"yada",
] ]
[[package]] [[package]]
name = "lindera-ipadic-neologd" name = "lindera-ipadic-neologd"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49382256f245078400bf7e72663f9eb30afcd9ed54cd46f29d7db1be529678e1" checksum = "20076660c4e79ef0316735b44e18ec7644e54786acdee8946c972d5f97086d0f"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -3003,21 +2994,31 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-neologd-builder" name = "lindera-ipadic-neologd-builder"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ae9cfd2fda68ef526ef0c7b50c5d4d5582a4daa6ecd0cea9e2b0b62564a2a5d" checksum = "eccd18ed5f65d1d64ac0cbfa1d6827bfbbaf6530520ae6847e6a91ee38f47e20"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding_rs",
"encoding_rs_io",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"lindera-dictionary-builder", "log",
"serde",
"yada",
] ]
[[package]] [[package]]
name = "lindera-ko-dic" name = "lindera-ko-dic"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f86d03a863f3ae1d269e7b7d4dd2cce9385a53463479bafc5d7aa48719f36db" checksum = "59073171566c3e498ca048e84c2d0a7e117a42f36c8eb7d7163e65ac38bd6d48"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -3033,21 +3034,29 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ko-dic-builder" name = "lindera-ko-dic-builder"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd0f44f2e56358c5879dfb5e7f76cc6ba7853ec31082c4e3f8fb65fb2d849c51" checksum = "ae176afa8535ca2a5ee9471873f85d531db0a6c32a3c42b41084506aac22b577"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"lindera-dictionary-builder", "log",
"yada",
] ]
[[package]] [[package]]
name = "lindera-tokenizer" name = "lindera-tokenizer"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c5182735cdc2832ac757b31e8a5b150a3514357a30efe3dec212f8dcb06ba14" checksum = "457285bdde84571aa510c9e05371904305a55e8a541fa1473d4393062f06932d"
dependencies = [ dependencies = [
"bincode", "bincode",
"lindera-core", "lindera-core",
@@ -3059,9 +3068,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic" name = "lindera-unidic"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c63da104728dd1cf14bfa564753cbfa996f6078ed2e23e31475bd1d639fc597" checksum = "5839980be552dfa639b70964c61914a9ad014148663679b0e148aa72e5e30f23"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@@ -3077,14 +3086,22 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic-builder" name = "lindera-unidic-builder"
version = "0.31.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04acecbc068dac21766a1b7ed1f2608b6f250d10b4f8bff67abc2a00437a0974" checksum = "dcaab8f061d5b944b1e424f49c7efbf8f276e8a72e4f4ff956d01e46d481f008"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"lindera-dictionary-builder", "log",
"yada",
] ]
[[package]] [[package]]
@@ -3255,7 +3272,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]] [[package]]
name = "meili-snap" name = "meili-snap"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"insta", "insta",
"md5", "md5",
@@ -3264,7 +3281,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch" name = "meilisearch"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-http", "actix-http",
@@ -3356,7 +3373,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"base64 0.21.7", "base64 0.21.7",
"enum-iterator", "enum-iterator",
@@ -3375,7 +3392,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-types" name = "meilisearch-types"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
@@ -3405,7 +3422,7 @@ dependencies = [
[[package]] [[package]]
name = "meilitool" name = "meilitool"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
@@ -3444,7 +3461,7 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"arroy", "arroy",
"big_s", "big_s",
@@ -3884,7 +3901,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"big_s", "big_s",
"serde_json", "serde_json",
@@ -4323,12 +4340,6 @@ dependencies = [
"regex-syntax", "regex-syntax",
] ]
[[package]]
name = "regex-lite"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.8.2" version = "0.8.2"
@@ -4889,12 +4900,6 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]] [[package]]
name = "strum" name = "strum"
version = "0.26.2" version = "0.26.2"
@@ -5308,9 +5313,9 @@ dependencies = [
[[package]] [[package]]
name = "tracing-actix-web" name = "tracing-actix-web"
version = "0.7.10" version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa069bd1503dd526ee793bb3fce408895136c95fc86d2edb2acf1c646d7f0684" checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"mutually_exclusive_features", "mutually_exclusive_features",
@@ -6047,7 +6052,7 @@ dependencies = [
[[package]] [[package]]
name = "xtask" name = "xtask"
version = "1.9.0" version = "1.8.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"build-info", "build-info",

View File

@@ -22,7 +22,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.9.0" version = "1.8.0"
authors = [ authors = [
"Quentin de Quelen <quentin@dequelen.me>", "Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>", "Clément Renault <clement@meilisearch.com>",

View File

@@ -25,7 +25,7 @@
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p> <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow. Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
<p align="center" name="demo"> <p align="center" name="demo">
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank"> <a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
@@ -39,8 +39,8 @@
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥 🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
## ✨ Features ## ✨ Features
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results
- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience - **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
@@ -55,15 +55,15 @@
## 📖 Documentation ## 📖 Documentation
You can consult Meilisearch's documentation at [meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs). You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
## 🚀 Getting started ## 🚀 Getting started
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide. For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
## 🌍 Supercharge your Meilisearch experience ## Supercharge your Meilisearch experience
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Additional features include analytics & monitoring in many regions around the world. No credit card is required. Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
## 🧰 SDKs & integration tools ## 🧰 SDKs & integration tools
@@ -85,13 +85,13 @@ Finally, for more in-depth information, refer to our articles explaining fundame
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want. Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data. To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation. If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
## 📫 Get in touch! ## 📫 Get in touch!
Meilisearch is a search engine created by [Meili]([https://www.welcometothejungle.com/en/companies/meilisearch](https://www.meilisearch.com/careers)), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact) Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months. 🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.

View File

@@ -11,7 +11,7 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
actix-web = { version = "4.6.0", default-features = false } actix-web = { version = "4.5.1", default-features = false }
anyhow = "1.0.79" anyhow = "1.0.79"
convert_case = "0.6.0" convert_case = "0.6.0"
csv = "1.3.0" csv = "1.3.0"
@@ -30,12 +30,7 @@ serde_json = "1.0.111"
tar = "0.4.40" tar = "0.4.40"
tempfile = "3.9.0" tempfile = "3.9.0"
thiserror = "1.0.56" thiserror = "1.0.56"
time = { version = "0.3.31", features = [ time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = "1.35" tokio = "1.35"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }

View File

@@ -189,6 +189,4 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
merge_with_error_impl_take_error_message!(ParseTaskStatusError); merge_with_error_impl_take_error_message!(ParseTaskStatusError);
merge_with_error_impl_take_error_message!(IndexUidFormatError); merge_with_error_impl_take_error_message!(IndexUidFormatError);
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
merge_with_error_impl_take_error_message!(InvalidSimilarId); merge_with_error_impl_take_error_message!(InvalidSimilarId);

View File

@@ -241,8 +241,6 @@ InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSearchRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
InvalidSimilarRankingScoreThreshold , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ; InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ; InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
@@ -507,21 +505,6 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
} }
} }
impl fmt::Display for deserr_codes::InvalidSearchRankingScoreThreshold {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`."
)
}
}
impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
deserr_codes::InvalidSearchRankingScoreThreshold.fmt(f)
}
}
#[macro_export] #[macro_export]
macro_rules! internal_error { macro_rules! internal_error {
($target:ty : $($other:path), *) => { ($target:ty : $($other:path), *) => {

View File

@@ -14,20 +14,20 @@ default-run = "meilisearch"
[dependencies] [dependencies]
actix-cors = "0.7.0" actix-cors = "0.7.0"
actix-http = { version = "3.7.0", default-features = false, features = [ actix-http = { version = "3.6.0", default-features = false, features = [
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"rustls-0_21", "rustls-0_21",
] } ] }
actix-utils = "3.0.1" actix-utils = "3.0.1"
actix-web = { version = "4.6.0", default-features = false, features = [ actix-web = { version = "4.5.1", default-features = false, features = [
"macros", "macros",
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"cookies", "cookies",
"rustls-0_21", "rustls-0_21",
] } ] }
actix-web-static-files = { version = "4.0.1", optional = true } actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.79", features = ["backtrace"] } anyhow = { version = "1.0.79", features = ["backtrace"] }
async-stream = "0.3.5" async-stream = "0.3.5"
async-trait = "0.1.77" async-trait = "0.1.77"
@@ -105,13 +105,13 @@ url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40" tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["json"] } tracing-subscriber = { version = "0.3.18", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" } tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.10" tracing-actix-web = "0.7.9"
build-info = { version = "1.7.0", path = "../build-info" } build-info = { version = "1.7.0", path = "../build-info" }
[dev-dependencies] [dev-dependencies]
actix-rt = "2.9.0" actix-rt = "2.9.0"
assert-json-diff = "2.0.2" assert-json-diff = "2.0.2"
brotli = "6.0.0" brotli = "3.4.0"
insta = "1.34.0" insta = "1.34.0"
manifest-dir-macros = "0.1.18" manifest-dir-macros = "0.1.18"
maplit = "1.0.2" maplit = "1.0.2"

View File

@@ -648,7 +648,6 @@ pub struct SearchAggregator {
// scoring // scoring
show_ranking_score: bool, show_ranking_score: bool,
show_ranking_score_details: bool, show_ranking_score_details: bool,
ranking_score_threshold: bool,
} }
impl SearchAggregator { impl SearchAggregator {
@@ -677,7 +676,6 @@ impl SearchAggregator {
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
} = query; } = query;
let mut ret = Self::default(); let mut ret = Self::default();
@@ -750,7 +748,6 @@ impl SearchAggregator {
ret.show_ranking_score = *show_ranking_score; ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details; ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some();
if let Some(hybrid) = hybrid { if let Some(hybrid) = hybrid {
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
@@ -824,7 +821,6 @@ impl SearchAggregator {
hybrid, hybrid,
total_degraded, total_degraded,
total_used_negative_operator, total_used_negative_operator,
ranking_score_threshold,
} = other; } = other;
if self.timestamp.is_none() { if self.timestamp.is_none() {
@@ -908,7 +904,6 @@ impl SearchAggregator {
// scoring // scoring
self.show_ranking_score |= show_ranking_score; self.show_ranking_score |= show_ranking_score;
self.show_ranking_score_details |= show_ranking_score_details; self.show_ranking_score_details |= show_ranking_score_details;
self.ranking_score_threshold |= ranking_score_threshold;
} }
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@@ -950,7 +945,6 @@ impl SearchAggregator {
hybrid, hybrid,
total_degraded, total_degraded,
total_used_negative_operator, total_used_negative_operator,
ranking_score_threshold,
} = self; } = self;
if total_received == 0 { if total_received == 0 {
@@ -1021,7 +1015,6 @@ impl SearchAggregator {
"scoring": { "scoring": {
"show_ranking_score": show_ranking_score, "show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details, "show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
}, },
}); });
@@ -1094,7 +1087,6 @@ impl MultiSearchAggregator {
matching_strategy: _, matching_strategy: _,
attributes_to_search_on: _, attributes_to_search_on: _,
hybrid: _, hybrid: _,
ranking_score_threshold: _,
} = query; } = query;
index_uid.as_str() index_uid.as_str()
@@ -1242,7 +1234,6 @@ impl FacetSearchAggregator {
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
} = query; } = query;
let mut ret = Self::default(); let mut ret = Self::default();
@@ -1257,8 +1248,7 @@ impl FacetSearchAggregator {
|| filter.is_some() || filter.is_some()
|| *matching_strategy != MatchingStrategy::default() || *matching_strategy != MatchingStrategy::default()
|| attributes_to_search_on.is_some() || attributes_to_search_on.is_some()
|| hybrid.is_some() || hybrid.is_some();
|| ranking_score_threshold.is_some();
ret ret
} }
@@ -1634,7 +1624,6 @@ pub struct SimilarAggregator {
// scoring // scoring
show_ranking_score: bool, show_ranking_score: bool,
show_ranking_score_details: bool, show_ranking_score_details: bool,
ranking_score_threshold: bool,
} }
impl SimilarAggregator { impl SimilarAggregator {
@@ -1649,7 +1638,6 @@ impl SimilarAggregator {
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
filter, filter,
ranking_score_threshold,
} = query; } = query;
let mut ret = Self::default(); let mut ret = Self::default();
@@ -1687,7 +1675,6 @@ impl SimilarAggregator {
ret.show_ranking_score = *show_ranking_score; ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details; ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some();
ret.embedder = embedder.is_some(); ret.embedder = embedder.is_some();
@@ -1721,7 +1708,6 @@ impl SimilarAggregator {
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
embedder, embedder,
ranking_score_threshold,
} = other; } = other;
if self.timestamp.is_none() { if self.timestamp.is_none() {
@@ -1763,7 +1749,6 @@ impl SimilarAggregator {
// scoring // scoring
self.show_ranking_score |= show_ranking_score; self.show_ranking_score |= show_ranking_score;
self.show_ranking_score_details |= show_ranking_score_details; self.show_ranking_score_details |= show_ranking_score_details;
self.ranking_score_threshold |= ranking_score_threshold;
} }
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@@ -1784,7 +1769,6 @@ impl SimilarAggregator {
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
embedder, embedder,
ranking_score_threshold,
} = self; } = self;
if total_received == 0 { if total_received == 0 {
@@ -1824,7 +1808,6 @@ impl SimilarAggregator {
"scoring": { "scoring": {
"show_ranking_score": show_ranking_score, "show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details, "show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
}, },
}); });

View File

@@ -14,8 +14,8 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData; use crate::extractors::authentication::GuardedData;
use crate::routes::indexes::search::search_kind; use crate::routes::indexes::search::search_kind;
use crate::search::{ use crate::search::{
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
}; };
use crate::search_queue::SearchQueue; use crate::search_queue::SearchQueue;
@@ -46,8 +46,6 @@ pub struct FacetSearchQuery {
pub matching_strategy: MatchingStrategy, pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>, pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
} }
pub async fn search( pub async fn search(
@@ -105,7 +103,6 @@ impl From<FacetSearchQuery> for SearchQuery {
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
} = value; } = value;
SearchQuery { SearchQuery {
@@ -131,7 +128,6 @@ impl From<FacetSearchQuery> for SearchQuery {
vector, vector,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
} }
} }
} }

View File

@@ -19,10 +19,9 @@ use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::search::{ use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchKind, SearchQuery,
SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
}; };
use crate::search_queue::SearchQueue; use crate::search_queue::SearchQueue;
@@ -83,21 +82,6 @@ pub struct SearchQueryGet {
pub hybrid_embedder: Option<String>, pub hybrid_embedder: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
pub hybrid_semantic_ratio: Option<SemanticRatioGet>, pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
pub struct RankingScoreThresholdGet(RankingScoreThreshold);
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
type Error = InvalidSearchRankingScoreThreshold;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
}
} }
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)] #[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
@@ -168,7 +152,6 @@ impl From<SearchQueryGet> for SearchQuery {
matching_strategy: other.matching_strategy, matching_strategy: other.matching_strategy,
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()), attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
hybrid, hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
} }
} }
} }

View File

@@ -6,8 +6,8 @@ use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::{ use meilisearch_types::error::deserr_codes::{
InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId, InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId,
InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarRankingScoreThreshold, InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarShowRankingScore,
InvalidSimilarShowRankingScore, InvalidSimilarShowRankingScoreDetails, InvalidSimilarShowRankingScoreDetails,
}; };
use meilisearch_types::error::{ErrorCode as _, ResponseError}; use meilisearch_types::error::{ErrorCode as _, ResponseError};
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
@@ -21,8 +21,8 @@ use crate::analytics::{Analytics, SimilarAggregator};
use crate::extractors::authentication::GuardedData; use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{ use crate::search::{
add_search_rules, perform_similar, RankingScoreThresholdSimilar, SearchKind, SimilarQuery, add_search_rules, perform_similar, SearchKind, SimilarQuery, SimilarResult,
SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
}; };
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
@@ -42,7 +42,9 @@ pub async fn similar_get(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.0.try_into()?; let query = params.0.try_into().map_err(|code: InvalidSimilarId| {
ResponseError::from_msg(code.to_string(), code.error_code())
})?;
let mut aggregate = SimilarAggregator::from_query(&query, &req); let mut aggregate = SimilarAggregator::from_query(&query, &req);
@@ -128,27 +130,12 @@ pub struct SimilarQueryGet {
show_ranking_score: Param<bool>, show_ranking_score: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)] #[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
show_ranking_score_details: Param<bool>, show_ranking_score_details: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)] #[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
pub embedder: Option<String>, pub embedder: Option<String>,
} }
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar);
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
type Error = InvalidSimilarRankingScoreThreshold;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?;
Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?))
}
}
impl TryFrom<SimilarQueryGet> for SimilarQuery { impl TryFrom<SimilarQueryGet> for SimilarQuery {
type Error = ResponseError; type Error = InvalidSimilarId;
fn try_from( fn try_from(
SimilarQueryGet { SimilarQueryGet {
@@ -160,7 +147,6 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
embedder, embedder,
ranking_score_threshold,
}: SimilarQueryGet, }: SimilarQueryGet,
) -> Result<Self, Self::Error> { ) -> Result<Self, Self::Error> {
let filter = match filter { let filter = match filter {
@@ -172,9 +158,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
}; };
Ok(SimilarQuery { Ok(SimilarQuery {
id: id.0.try_into().map_err(|code: InvalidSimilarId| { id: id.0.try_into()?,
ResponseError::from_msg(code.to_string(), code.error_code())
})?,
offset: offset.0, offset: offset.0,
limit: limit.0, limit: limit.0,
filter, filter,
@@ -182,7 +166,6 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()), attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
show_ranking_score: show_ranking_score.0, show_ranking_score: show_ranking_score.0,
show_ranking_score_details: show_ranking_score_details.0, show_ranking_score_details: show_ranking_score_details.0,
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
}) })
} }
} }

View File

@@ -87,44 +87,6 @@ pub struct SearchQuery {
pub matching_strategy: MatchingStrategy, pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>, pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
pub struct RankingScoreThreshold(f64);
impl std::convert::TryFrom<f64> for RankingScoreThreshold {
type Error = InvalidSearchRankingScoreThreshold;
fn try_from(f: f64) -> Result<Self, Self::Error> {
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
#[allow(clippy::manual_range_contains)]
if f > 1.0 || f < 0.0 {
Err(InvalidSearchRankingScoreThreshold)
} else {
Ok(RankingScoreThreshold(f))
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
pub struct RankingScoreThresholdSimilar(f64);
impl std::convert::TryFrom<f64> for RankingScoreThresholdSimilar {
type Error = InvalidSimilarRankingScoreThreshold;
fn try_from(f: f64) -> Result<Self, Self::Error> {
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
#[allow(clippy::manual_range_contains)]
if f > 1.0 || f < 0.0 {
Err(InvalidSimilarRankingScoreThreshold)
} else {
Ok(Self(f))
}
}
} }
// Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum. // Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum.
@@ -155,7 +117,6 @@ impl fmt::Debug for SearchQuery {
crop_marker, crop_marker,
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
ranking_score_threshold,
} = self; } = self;
let mut debug = f.debug_struct("SearchQuery"); let mut debug = f.debug_struct("SearchQuery");
@@ -227,9 +188,6 @@ impl fmt::Debug for SearchQuery {
debug.field("highlight_pre_tag", &highlight_pre_tag); debug.field("highlight_pre_tag", &highlight_pre_tag);
debug.field("highlight_post_tag", &highlight_post_tag); debug.field("highlight_post_tag", &highlight_post_tag);
debug.field("crop_marker", &crop_marker); debug.field("crop_marker", &crop_marker);
if let Some(ranking_score_threshold) = ranking_score_threshold {
debug.field("ranking_score_threshold", &ranking_score_threshold);
}
debug.finish() debug.finish()
} }
@@ -398,8 +356,6 @@ pub struct SearchQueryWithIndex {
pub matching_strategy: MatchingStrategy, pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>, pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
} }
impl SearchQueryWithIndex { impl SearchQueryWithIndex {
@@ -428,7 +384,6 @@ impl SearchQueryWithIndex {
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
} = self; } = self;
( (
index_uid, index_uid,
@@ -455,7 +410,6 @@ impl SearchQueryWithIndex {
matching_strategy, matching_strategy,
attributes_to_search_on, attributes_to_search_on,
hybrid, hybrid,
ranking_score_threshold,
// do not use ..Default::default() here, // do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
}, },
@@ -482,8 +436,6 @@ pub struct SimilarQuery {
pub show_ranking_score: bool, pub show_ranking_score: bool,
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)] #[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
pub show_ranking_score_details: bool, pub show_ranking_score_details: bool,
#[deserr(default, error = DeserrJsonError<InvalidSimilarRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>,
} }
#[derive(Debug, Clone, PartialEq, Deserr)] #[derive(Debug, Clone, PartialEq, Deserr)]
@@ -525,8 +477,6 @@ pub enum MatchingStrategy {
Last, Last,
/// All query words are mandatory /// All query words are mandatory
All, All,
/// Remove query words from the most frequent to the least
Frequency,
} }
impl Default for MatchingStrategy { impl Default for MatchingStrategy {
@@ -540,7 +490,6 @@ impl From<MatchingStrategy> for TermsMatchingStrategy {
match other { match other {
MatchingStrategy::Last => Self::Last, MatchingStrategy::Last => Self::Last,
MatchingStrategy::All => Self::All, MatchingStrategy::All => Self::All,
MatchingStrategy::Frequency => Self::Frequency,
} }
} }
} }
@@ -712,9 +661,6 @@ fn prepare_search<'t>(
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(rtxn); let mut search = index.search(rtxn);
search.time_budget(time_budget); search.time_budget(time_budget);
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
search.ranking_score_threshold(ranking_score_threshold.0);
}
match search_kind { match search_kind {
SearchKind::KeywordOnly => { SearchKind::KeywordOnly => {
@@ -756,16 +702,11 @@ fn prepare_search<'t>(
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination); search.exhaustive_number_hits(is_finite_pagination);
search.scoring_strategy( search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
if query.show_ranking_score ScoringStrategy::Detailed
|| query.show_ranking_score_details } else {
|| query.ranking_score_threshold.is_some() ScoringStrategy::Skip
{ });
ScoringStrategy::Detailed
} else {
ScoringStrategy::Skip
},
);
// compute the offset on the limit depending on the pagination mode. // compute the offset on the limit depending on the pagination mode.
let (offset, limit) = if is_finite_pagination { let (offset, limit) = if is_finite_pagination {
@@ -843,6 +784,10 @@ pub fn perform_search(
let SearchQuery { let SearchQuery {
q, q,
vector: _,
hybrid: _,
// already computed from prepare_search
offset: _,
limit, limit,
page, page,
hits_per_page, hits_per_page,
@@ -853,19 +798,14 @@ pub fn perform_search(
show_matches_position, show_matches_position,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
filter: _,
sort, sort,
facets, facets,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
crop_marker, crop_marker,
// already used in prepare_search
vector: _,
hybrid: _,
offset: _,
ranking_score_threshold: _,
matching_strategy: _, matching_strategy: _,
attributes_to_search_on: _, attributes_to_search_on: _,
filter: _,
} = query; } = query;
let format = AttributesFormat { let format = AttributesFormat {
@@ -1127,7 +1067,6 @@ pub fn perform_similar(
attributes_to_retrieve, attributes_to_retrieve,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
ranking_score_threshold,
} = query; } = query;
// using let-else rather than `?` so that the borrow checker identifies we're always returning here, // using let-else rather than `?` so that the borrow checker identifies we're always returning here,
@@ -1151,10 +1090,6 @@ pub fn perform_similar(
} }
} }
if let Some(ranking_score_threshold) = ranking_score_threshold {
similar.ranking_score_threshold(ranking_score_threshold.0);
}
let milli::SearchResult { let milli::SearchResult {
documents_ids, documents_ids,
matching_words: _, matching_words: _,

View File

@@ -321,40 +321,6 @@ async fn search_bad_facets() {
// Can't make the `attributes_to_highlight` fail with a get search since it'll accept anything as an array of strings. // Can't make the `attributes_to_highlight` fail with a get search since it'll accept anything as an array of strings.
} }
#[actix_rt::test]
async fn search_bad_threshold() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.search_post(json!({"rankingScoreThreshold": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found a string: `\"doggo\"`",
"code": "invalid_search_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
}
"###);
}
#[actix_rt::test]
async fn search_invalid_threshold() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.search_post(json!({"rankingScoreThreshold": 42})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
"code": "invalid_search_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold"
}
"###);
}
#[actix_rt::test] #[actix_rt::test]
async fn search_non_filterable_facets() { async fn search_non_filterable_facets() {
let server = Server::new().await; let server = Server::new().await;
@@ -539,7 +505,7 @@ async fn search_bad_matching_strategy() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`, `frequency`", "message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`",
"code": "invalid_search_matching_strategy", "code": "invalid_search_matching_strategy",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy" "link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"
@@ -561,7 +527,7 @@ async fn search_bad_matching_strategy() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`, `frequency`", "message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`",
"code": "invalid_search_matching_strategy", "code": "invalid_search_matching_strategy",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy" "link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"

View File

@@ -117,69 +117,3 @@ async fn geo_bounding_box_with_string_and_number() {
) )
.await; .await;
} }
#[actix_rt::test]
async fn bug_4640() {
// https://github.com/meilisearch/meilisearch/issues/4640
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.update_settings_filterable_attributes(json!(["_geo"])).await;
let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
index.wait_task(ret.uid()).await;
// Sort the document with the second one first
index
.search(
json!({
"sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
{
"hits": [
{
"id": 2,
"name": "La Bella Italia",
"address": "456 Elm Street, Townsville",
"type": "Italian",
"rating": 9,
"_geo": {
"lat": "45.4777599",
"lng": "9.1967508"
}
},
{
"id": 1,
"name": "Taco Truck",
"address": "444 Salsa Street, Burritoville",
"type": "Mexican",
"rating": 9,
"_geo": {
"lat": 34.0522,
"lng": -118.2437
},
"_geoDistance": 9714063
},
{
"id": 3,
"name": "Crêpe Truck",
"address": "2 Billig Avenue, Rouenville",
"type": "French",
"rating": 10
}
],
"query": "",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"###);
},
)
.await;
}

View File

@@ -1,128 +0,0 @@
use meili_snap::snapshot;
use once_cell::sync::Lazy;
use crate::common::index::Index;
use crate::common::{Server, Value};
use crate::json;
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
let index = server.index("test");
index.add_documents(documents.clone(), None).await;
index.wait_task(0).await;
index
}
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"id": "1",
},
{
"title": "Captain Planet",
"id": "2",
},
{
"title": "Captain Marvel",
"id": "3",
},
{
"title": "a Captain Marvel ersatz",
"id": "4"
},
{
"title": "He's not part of the Marvel Cinematic Universe",
"id": "5"
},
{
"title": "a Shazam ersatz, but better than Captain Planet",
"id": "6"
},
{
"title": "Capitain CAAAAAVEEERNE!!!!",
"id": "7"
}
])
});
#[actix_rt::test]
async fn simple_search() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
index
.search(json!({"q": "Captain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"2"},{"id":"6"},{"id":"7"}]"###);
})
.await;
index
.search(json!({"q": "Captain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
})
.await;
index
.search(json!({"q": "Captain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
})
.await;
}
#[actix_rt::test]
async fn search_with_typo() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
index
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"7"},{"id":"2"},{"id":"6"}]"###);
})
.await;
index
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
})
.await;
index
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
})
.await;
}
#[actix_rt::test]
async fn search_with_unknown_word() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
index
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"2"},{"id":"3"},{"id":"4"},{"id":"6"},{"id":"7"}]"###);
})
.await;
index
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @"[]");
})
.await;
index
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
})
.await;
}

View File

@@ -7,7 +7,6 @@ mod facet_search;
mod formatted; mod formatted;
mod geo; mod geo;
mod hybrid; mod hybrid;
mod matching_strategy;
mod multi; mod multi;
mod pagination; mod pagination;
mod restrict_searchable; mod restrict_searchable;
@@ -48,31 +47,6 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
]) ])
}); });
static SCORE_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
},
{
"title": "Batman Returns",
"id": "C",
},
{
"title": "Batman",
"id": "D",
},
{
"title": "Badman",
"id": "E",
}
])
});
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| { static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([ json!([
{ {
@@ -985,213 +959,6 @@ async fn test_score_details() {
.await; .await;
} }
#[actix_rt::test]
async fn test_score() {
let server = Server::new().await;
let index = server.index("test");
let documents = SCORE_DOCUMENTS.clone();
let res = index.add_documents(json!(documents), None).await;
index.wait_task(res.0.uid()).await;
index
.search(
json!({
"q": "Badman the dark knight returns 1",
"showRankingScore": true,
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.9746605609456898
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.8055252965383685
},
{
"title": "Badman",
"id": "E",
"_rankingScore": 0.16666666666666666
},
{
"title": "Batman Returns",
"id": "C",
"_rankingScore": 0.07702020202020202
},
{
"title": "Batman",
"id": "D",
"_rankingScore": 0.07702020202020202
}
]
"###);
},
)
.await;
}
#[actix_rt::test]
async fn test_score_threshold() {
let query = "Badman dark returns 1";
let server = Server::new().await;
let index = server.index("test");
let documents = SCORE_DOCUMENTS.clone();
let res = index.add_documents(json!(documents), None).await;
index.wait_task(res.0.uid()).await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.0
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"5");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.6685627880184332
},
{
"title": "Badman",
"id": "E",
"_rankingScore": 0.25
},
{
"title": "Batman Returns",
"id": "C",
"_rankingScore": 0.11553030303030302
},
{
"title": "Batman",
"id": "D",
"_rankingScore": 0.11553030303030302
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.2
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"3"###);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.6685627880184332
},
{
"title": "Badman",
"id": "E",
"_rankingScore": 0.25
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.5
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"2"###);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
"_rankingScore": 0.6685627880184332
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 0.8
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"1"###);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
"_rankingScore": 0.93430081300813
}
]
"###);
},
)
.await;
index
.search(
json!({
"q": query,
"showRankingScore": true,
"rankingScoreThreshold": 1.0
}),
|response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"0"###);
// nobody is perfect
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]");
},
)
.await;
}
#[actix_rt::test] #[actix_rt::test]
async fn test_degraded_score_details() { async fn test_degraded_score_details() {
let server = Server::new().await; let server = Server::new().await;

View File

@@ -87,68 +87,6 @@ async fn similar_bad_id() {
"###); "###);
} }
#[actix_rt::test]
async fn similar_bad_ranking_score_threshold() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"rankingScoreThreshold": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found an array: `[\"doggo\"]`",
"code": "invalid_similar_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
}
"###);
}
#[actix_rt::test]
async fn similar_invalid_ranking_score_threshold() {
let server = Server::new().await;
let index = server.index("test");
server.set_features(json!({"vectorStore": true})).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"rankingScoreThreshold": 42})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.",
"code": "invalid_similar_ranking_score_threshold",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold"
}
"###);
}
#[actix_rt::test] #[actix_rt::test]
async fn similar_invalid_id() { async fn similar_invalid_id() {
let server = Server::new().await; let server = Server::new().await;

View File

@@ -194,235 +194,6 @@ async fn basic() {
.await; .await;
} }
#[actix_rt::test]
async fn ranking_score_threshold() {
let server = Server::new().await;
let index = server.index("test");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
"filterableAttributes": ["title"]}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": [
0.1,
0.6,
0.8
]
},
"_rankingScore": 0.890957772731781
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": [
0.6,
0.8,
-0.2
]
},
"_rankingScore": 0.39060014486312866
},
{
"title": "How to Train Your Dragon: The Hidden World",
"release_year": 2019,
"id": "166428",
"_vectors": {
"manual": [
0.7,
0.7,
-0.4
]
},
"_rankingScore": 0.2819308042526245
},
{
"title": "Shazam!",
"release_year": 2019,
"id": "287947",
"_vectors": {
"manual": [
0.8,
0.4,
-0.5
]
},
"_rankingScore": 0.1662663221359253
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": [
0.1,
0.6,
0.8
]
},
"_rankingScore": 0.890957772731781
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": [
0.6,
0.8,
-0.2
]
},
"_rankingScore": 0.39060014486312866
},
{
"title": "How to Train Your Dragon: The Hidden World",
"release_year": 2019,
"id": "166428",
"_vectors": {
"manual": [
0.7,
0.7,
-0.4
]
},
"_rankingScore": 0.2819308042526245
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": [
0.1,
0.6,
0.8
]
},
"_rankingScore": 0.890957772731781
},
{
"title": "Captain Marvel",
"release_year": 2019,
"id": "299537",
"_vectors": {
"manual": [
0.6,
0.8,
-0.2
]
},
"_rankingScore": 0.39060014486312866
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
"release_year": 2019,
"id": "522681",
"_vectors": {
"manual": [
0.1,
0.6,
0.8
]
},
"_rankingScore": 0.890957772731781
}
]
"###);
},
)
.await;
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @"[]");
},
)
.await;
}
#[actix_rt::test] #[actix_rt::test]
async fn filter() { async fn filter() {
let server = Server::new().await; let server = Server::new().await;

View File

@@ -31,7 +31,6 @@ macro_rules! verify_snapshot {
} }
#[actix_rt::test] #[actix_rt::test]
#[cfg_attr(target_os = "windows", ignore)]
async fn perform_snapshot() { async fn perform_snapshot() {
let temp = tempfile::tempdir().unwrap(); let temp = tempfile::tempdir().unwrap();
let snapshot_dir = tempfile::tempdir().unwrap(); let snapshot_dir = tempfile::tempdir().unwrap();

View File

@@ -17,7 +17,7 @@ bincode = "1.3.3"
bstr = "1.9.0" bstr = "1.9.0"
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] } bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0" byteorder = "1.5.0"
charabia = { version = "0.8.11", default-features = false } charabia = { version = "0.8.10", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
crossbeam-channel = "0.5.11" crossbeam-channel = "0.5.11"
deserr = "0.6.1" deserr = "0.6.1"

View File

@@ -66,7 +66,6 @@ fn main() -> Result<(), Box<dyn Error>> {
&mut DefaultSearchLogger, &mut DefaultSearchLogger,
logger, logger,
TimeBudget::max(), TimeBudget::max(),
None,
)?; )?;
if let Some((logger, dir)) = detailed_logger { if let Some((logger, dir)) = detailed_logger {
logger.finish(&mut ctx, Path::new(dir))?; logger.finish(&mut ctx, Path::new(dir))?;

View File

@@ -169,7 +169,6 @@ impl<'a> Search<'a> {
index: self.index, index: self.index,
semantic: self.semantic.clone(), semantic: self.semantic.clone(),
time_budget: self.time_budget.clone(), time_budget: self.time_budget.clone(),
ranking_score_threshold: self.ranking_score_threshold,
}; };
let semantic = search.semantic.take(); let semantic = search.semantic.take();

View File

@@ -50,7 +50,6 @@ pub struct Search<'a> {
index: &'a Index, index: &'a Index,
semantic: Option<SemanticSearch>, semantic: Option<SemanticSearch>,
time_budget: TimeBudget, time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
} }
impl<'a> Search<'a> { impl<'a> Search<'a> {
@@ -71,7 +70,6 @@ impl<'a> Search<'a> {
index, index,
semantic: None, semantic: None,
time_budget: TimeBudget::max(), time_budget: TimeBudget::max(),
ranking_score_threshold: None,
} }
} }
@@ -148,11 +146,6 @@ impl<'a> Search<'a> {
self self
} }
pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Search<'a> {
self.ranking_score_threshold = Some(ranking_score_threshold);
self
}
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> { pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
if has_vector_search { if has_vector_search {
let ctx = SearchContext::new(self.index, self.rtxn)?; let ctx = SearchContext::new(self.index, self.rtxn)?;
@@ -191,7 +184,6 @@ impl<'a> Search<'a> {
embedder_name, embedder_name,
embedder, embedder,
self.time_budget.clone(), self.time_budget.clone(),
self.ranking_score_threshold,
)? )?
} }
_ => execute_search( _ => execute_search(
@@ -209,7 +201,6 @@ impl<'a> Search<'a> {
&mut DefaultSearchLogger, &mut DefaultSearchLogger,
&mut DefaultSearchLogger, &mut DefaultSearchLogger,
self.time_budget.clone(), self.time_budget.clone(),
self.ranking_score_threshold,
)?, )?,
}; };
@@ -248,7 +239,6 @@ impl fmt::Debug for Search<'_> {
index: _, index: _,
semantic, semantic,
time_budget, time_budget,
ranking_score_threshold,
} = self; } = self;
f.debug_struct("Search") f.debug_struct("Search")
.field("query", query) .field("query", query)
@@ -267,7 +257,6 @@ impl fmt::Debug for Search<'_> {
&semantic.as_ref().map(|semantic| &semantic.embedder_name), &semantic.as_ref().map(|semantic| &semantic.embedder_name),
) )
.field("time_budget", time_budget) .field("time_budget", time_budget)
.field("ranking_score_threshold", ranking_score_threshold)
.finish() .finish()
} }
} }
@@ -288,8 +277,6 @@ pub enum TermsMatchingStrategy {
Last, Last,
// all words are mandatory // all words are mandatory
All, All,
// remove more frequent word first
Frequency,
} }
impl Default for TermsMatchingStrategy { impl Default for TermsMatchingStrategy {

View File

@@ -28,7 +28,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
scoring_strategy: ScoringStrategy, scoring_strategy: ScoringStrategy,
logger: &mut dyn SearchLogger<Q>, logger: &mut dyn SearchLogger<Q>,
time_budget: TimeBudget, time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
) -> Result<BucketSortOutput> { ) -> Result<BucketSortOutput> {
logger.initial_query(query); logger.initial_query(query);
logger.ranking_rules(&ranking_rules); logger.ranking_rules(&ranking_rules);
@@ -165,19 +164,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
loop { loop {
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
ranking_rule_scores.push(ScoreDetails::Skipped); ranking_rule_scores.push(ScoreDetails::Skipped);
// remove candidates from the universe without adding them to result if their score is below the threshold
if let Some(ranking_score_threshold) = ranking_score_threshold {
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
if current_score < ranking_score_threshold {
all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index];
back!();
continue;
}
}
maybe_add_to_results!(bucket); maybe_add_to_results!(bucket);
ranking_rule_scores.pop(); ranking_rule_scores.pop();
if cur_ranking_rule_index == 0 { if cur_ranking_rule_index == 0 {
@@ -233,18 +220,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
debug_assert!( debug_assert!(
ranking_rule_universes[cur_ranking_rule_index].is_superset(&next_bucket.candidates) ranking_rule_universes[cur_ranking_rule_index].is_superset(&next_bucket.candidates)
); );
// remove candidates from the universe without adding them to result if their score is below the threshold
if let Some(ranking_score_threshold) = ranking_score_threshold {
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
if current_score < ranking_score_threshold {
all_candidates -=
next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
back!();
continue;
}
}
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates; ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
if cur_ranking_rule_index == ranking_rules_len - 1 if cur_ranking_rule_index == ranking_rules_len - 1

View File

@@ -164,21 +164,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
} }
costs costs
} }
TermsMatchingStrategy::Frequency => {
let removal_order =
query_graph.removal_order_for_terms_matching_strategy_frequency(ctx)?;
let mut forbidden_nodes =
SmallBitmap::for_interned_values_in(&query_graph.nodes);
let mut costs = query_graph.nodes.map(|_| None);
// FIXME: this works because only words uses termsmatchingstrategy at the moment.
for ns in removal_order {
for n in ns.iter() {
*costs.get_mut(n) = Some((1, forbidden_nodes.clone()));
}
forbidden_nodes.union(&ns);
}
costs
}
TermsMatchingStrategy::All => query_graph.nodes.map(|_| None), TermsMatchingStrategy::All => query_graph.nodes.map(|_| None),
} }
} else { } else {

View File

@@ -523,7 +523,6 @@ mod tests {
&mut crate::DefaultSearchLogger, &mut crate::DefaultSearchLogger,
&mut crate::DefaultSearchLogger, &mut crate::DefaultSearchLogger,
TimeBudget::max(), TimeBudget::max(),
None,
) )
.unwrap(); .unwrap();

View File

@@ -197,11 +197,6 @@ fn resolve_maximally_reduced_query_graph(
.iter() .iter()
.flat_map(|x| x.iter()) .flat_map(|x| x.iter())
.collect(), .collect(),
TermsMatchingStrategy::Frequency => query_graph
.removal_order_for_terms_matching_strategy_frequency(ctx)?
.iter()
.flat_map(|x| x.iter())
.collect(),
TermsMatchingStrategy::All => vec![], TermsMatchingStrategy::All => vec![],
}; };
graph.remove_nodes_keep_edges(&nodes_to_remove); graph.remove_nodes_keep_edges(&nodes_to_remove);
@@ -573,7 +568,6 @@ pub fn execute_vector_search(
embedder_name: &str, embedder_name: &str,
embedder: &Embedder, embedder: &Embedder,
time_budget: TimeBudget, time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
) -> Result<PartialSearchResult> { ) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?; check_sort_criteria(ctx, sort_criteria.as_ref())?;
@@ -603,7 +597,6 @@ pub fn execute_vector_search(
scoring_strategy, scoring_strategy,
placeholder_search_logger, placeholder_search_logger,
time_budget, time_budget,
ranking_score_threshold,
)?; )?;
Ok(PartialSearchResult { Ok(PartialSearchResult {
@@ -633,7 +626,6 @@ pub fn execute_search(
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>, placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
query_graph_logger: &mut dyn SearchLogger<QueryGraph>, query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
time_budget: TimeBudget, time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
) -> Result<PartialSearchResult> { ) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?; check_sort_criteria(ctx, sort_criteria.as_ref())?;
@@ -722,7 +714,6 @@ pub fn execute_search(
scoring_strategy, scoring_strategy,
query_graph_logger, query_graph_logger,
time_budget, time_budget,
ranking_score_threshold,
)? )?
} else { } else {
let ranking_rules = let ranking_rules =
@@ -737,7 +728,6 @@ pub fn execute_search(
scoring_strategy, scoring_strategy,
placeholder_search_logger, placeholder_search_logger,
time_budget, time_budget,
ranking_score_threshold,
)? )?
}; };

View File

@@ -1,9 +1,8 @@
use std::cmp::{Ordering, Reverse}; use std::cmp::Ordering;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use fxhash::{FxHashMap, FxHasher}; use fxhash::{FxHashMap, FxHasher};
use roaring::RoaringBitmap;
use super::interner::{FixedSizeInterner, Interned}; use super::interner::{FixedSizeInterner, Interned};
use super::query_term::{ use super::query_term::{
@@ -12,7 +11,6 @@ use super::query_term::{
use super::small_bitmap::SmallBitmap; use super::small_bitmap::SmallBitmap;
use super::SearchContext; use super::SearchContext;
use crate::search::new::interner::Interner; use crate::search::new::interner::Interner;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
use crate::Result; use crate::Result;
/// A node of the [`QueryGraph`]. /// A node of the [`QueryGraph`].
@@ -292,49 +290,6 @@ impl QueryGraph {
} }
} }
pub fn removal_order_for_terms_matching_strategy_frequency(
&self,
ctx: &mut SearchContext,
) -> Result<Vec<SmallBitmap<QueryNode>>> {
// lookup frequency for each term
let mut term_with_frequency: Vec<(u8, u64)> = {
let mut term_docids: BTreeMap<u8, RoaringBitmap> = Default::default();
for (_, node) in self.nodes.iter() {
match &node.data {
QueryNodeData::Term(t) => {
let docids = compute_query_term_subset_docids(ctx, &t.term_subset)?;
for id in t.term_ids.clone() {
term_docids
.entry(id)
.and_modify(|curr| *curr |= &docids)
.or_insert_with(|| docids.clone());
}
}
QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue,
}
}
term_docids
.into_iter()
.map(|(idx, docids)| match docids.len() {
0 => (idx, u64::max_value()),
frequency => (idx, frequency),
})
.collect()
};
term_with_frequency.sort_by_key(|(_, frequency)| Reverse(*frequency));
let mut term_weight = BTreeMap::new();
let mut weight: u16 = 1;
let mut peekable = term_with_frequency.into_iter().peekable();
while let Some((idx, frequency)) = peekable.next() {
term_weight.insert(idx, weight);
if peekable.peek().map_or(false, |(_, f)| frequency != *f) {
weight += 1;
}
}
let cost_of_term_idx = move |term_idx: u8| *term_weight.get(&term_idx).unwrap();
Ok(self.removal_order_for_terms_matching_strategy(ctx, cost_of_term_idx))
}
pub fn removal_order_for_terms_matching_strategy_last( pub fn removal_order_for_terms_matching_strategy_last(
&self, &self,
ctx: &SearchContext, ctx: &SearchContext,
@@ -360,19 +315,10 @@ impl QueryGraph {
if first_term_idx >= last_term_idx { if first_term_idx >= last_term_idx {
return vec![]; return vec![];
} }
let cost_of_term_idx = |term_idx: u8| { let cost_of_term_idx = |term_idx: u8| {
let rank = 1 + last_term_idx - term_idx; let rank = 1 + last_term_idx - term_idx;
rank as u16 rank as u16
}; };
self.removal_order_for_terms_matching_strategy(ctx, cost_of_term_idx)
}
pub fn removal_order_for_terms_matching_strategy(
&self,
ctx: &SearchContext,
order: impl Fn(u8) -> u16,
) -> Vec<SmallBitmap<QueryNode>> {
let mut nodes_to_remove = BTreeMap::<u16, SmallBitmap<QueryNode>>::new(); let mut nodes_to_remove = BTreeMap::<u16, SmallBitmap<QueryNode>>::new();
let mut at_least_one_mandatory_term = false; let mut at_least_one_mandatory_term = false;
for (node_id, node) in self.nodes.iter() { for (node_id, node) in self.nodes.iter() {
@@ -383,7 +329,7 @@ impl QueryGraph {
} }
let mut cost = 0; let mut cost = 0;
for id in t.term_ids.clone() { for id in t.term_ids.clone() {
cost = std::cmp::max(cost, order(id)); cost = std::cmp::max(cost, cost_of_term_idx(id));
} }
nodes_to_remove nodes_to_remove
.entry(cost) .entry(cost)

View File

@@ -17,7 +17,6 @@ pub struct Similar<'a> {
index: &'a Index, index: &'a Index,
embedder_name: String, embedder_name: String,
embedder: Arc<Embedder>, embedder: Arc<Embedder>,
ranking_score_threshold: Option<f64>,
} }
impl<'a> Similar<'a> { impl<'a> Similar<'a> {
@@ -30,17 +29,7 @@ impl<'a> Similar<'a> {
embedder_name: String, embedder_name: String,
embedder: Arc<Embedder>, embedder: Arc<Embedder>,
) -> Self { ) -> Self {
Self { Self { id, filter: None, offset, limit, rtxn, index, embedder_name, embedder }
id,
filter: None,
offset,
limit,
rtxn,
index,
embedder_name,
embedder,
ranking_score_threshold: None,
}
} }
pub fn filter(&mut self, filter: Filter<'a>) -> &mut Self { pub fn filter(&mut self, filter: Filter<'a>) -> &mut Self {
@@ -48,18 +37,8 @@ impl<'a> Similar<'a> {
self self
} }
pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Self {
self.ranking_score_threshold = Some(ranking_score_threshold);
self
}
pub fn execute(&self) -> Result<SearchResult> { pub fn execute(&self) -> Result<SearchResult> {
let mut universe = filtered_universe(self.index, self.rtxn, &self.filter)?; let universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
// we never want to receive the docid
universe.remove(self.id);
let universe = universe;
let embedder_index = let embedder_index =
self.index self.index
@@ -98,8 +77,6 @@ impl<'a> Similar<'a> {
let mut documents_seen = RoaringBitmap::new(); let mut documents_seen = RoaringBitmap::new();
documents_seen.insert(self.id); documents_seen.insert(self.id);
let mut candidates = universe;
for (docid, distance) in results for (docid, distance) in results
.into_iter() .into_iter()
// skip documents we've already seen & mark that we saw the current document // skip documents we've already seen & mark that we saw the current document
@@ -108,6 +85,8 @@ impl<'a> Similar<'a> {
// take **after** filter and skip so that we get exactly limit elements if available // take **after** filter and skip so that we get exactly limit elements if available
.take(self.limit) .take(self.limit)
{ {
documents_ids.push(docid);
let score = 1.0 - distance; let score = 1.0 - distance;
let score = self let score = self
.embedder .embedder
@@ -115,28 +94,14 @@ impl<'a> Similar<'a> {
.map(|distribution| distribution.shift(score)) .map(|distribution| distribution.shift(score))
.unwrap_or(score); .unwrap_or(score);
let score_details = let score = ScoreDetails::Vector(score_details::Vector { similarity: Some(score) });
vec![ScoreDetails::Vector(score_details::Vector { similarity: Some(score) })];
let score = ScoreDetails::global_score(score_details.iter()); document_scores.push(vec![score]);
if let Some(ranking_score_threshold) = &self.ranking_score_threshold {
if score < *ranking_score_threshold {
// this document is no longer a candidate
candidates.remove(docid);
// any document after this one is no longer a candidate either, so restrict the set to documents already seen.
candidates &= documents_seen;
break;
}
}
documents_ids.push(docid);
document_scores.push(score_details);
} }
Ok(SearchResult { Ok(SearchResult {
matching_words: Default::default(), matching_words: Default::default(),
candidates, candidates: universe,
documents_ids, documents_ids,
document_scores, document_scores,
degraded: false, degraded: false,

View File

@@ -40,26 +40,11 @@ pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
operation: DelAddOperation, operation: DelAddOperation,
buffer: &mut Vec<u8>, buffer: &mut Vec<u8>,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
into_del_add_obkv_conditional_operation(reader, buffer, |_| operation)
}
/// Akin to the [into_del_add_obkv] function but lets you
/// conditionally define the `DelAdd` variant based on the obkv key.
pub fn into_del_add_obkv_conditional_operation<K, F>(
reader: obkv::KvReader<K>,
buffer: &mut Vec<u8>,
operation: F,
) -> std::io::Result<()>
where
K: obkv::Key + PartialOrd,
F: Fn(K) -> DelAddOperation,
{
let mut writer = obkv::KvWriter::new(buffer); let mut writer = obkv::KvWriter::new(buffer);
let mut value_buffer = Vec::new(); let mut value_buffer = Vec::new();
for (key, value) in reader.iter() { for (key, value) in reader.iter() {
value_buffer.clear(); value_buffer.clear();
let mut value_writer = KvWriterDelAdd::new(&mut value_buffer); let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
let operation = operation(key);
if matches!(operation, DelAddOperation::Deletion | DelAddOperation::DeletionAndAddition) { if matches!(operation, DelAddOperation::Deletion | DelAddOperation::DeletionAndAddition) {
value_writer.insert(DelAdd::Deletion, value)?; value_writer.insert(DelAdd::Deletion, value)?;
} }

View File

@@ -1,5 +1,5 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet}; use std::collections::BTreeMap;
use std::convert::TryInto; use std::convert::TryInto;
use std::fs::File; use std::fs::File;
use std::io::{self, BufReader}; use std::io::{self, BufReader};
@@ -9,7 +9,7 @@ use std::result::Result as StdResult;
use bytemuck::bytes_of; use bytemuck::bytes_of;
use grenad::Sorter; use grenad::Sorter;
use heed::BytesEncode; use heed::BytesEncode;
use itertools::{merge_join_by, EitherOrBoth}; use itertools::EitherOrBoth;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use serde_json::{from_slice, Value}; use serde_json::{from_slice, Value};
@@ -18,7 +18,7 @@ use FilterableValues::{Empty, Null, Values};
use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters}; use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters};
use crate::error::InternalError; use crate::error::InternalError;
use crate::facet::value_encoding::f64_into_bytes; use crate::facet::value_encoding::f64_into_bytes;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::index_documents::{create_writer, writer_into_reader}; use crate::update::index_documents::{create_writer, writer_into_reader};
use crate::update::settings::InnerIndexSettingsDiff; use crate::update::settings::InnerIndexSettingsDiff;
use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH}; use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH};
@@ -45,6 +45,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>, obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,
settings_diff: &InnerIndexSettingsDiff, settings_diff: &InnerIndexSettingsDiff,
geo_fields_ids: Option<(FieldId, FieldId)>,
) -> Result<ExtractedFacetValues> { ) -> Result<ExtractedFacetValues> {
let max_memory = indexer.max_memory_by_thread(); let max_memory = indexer.max_memory_by_thread();
@@ -75,181 +76,143 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
let mut numbers_key_buffer = Vec::new(); let mut numbers_key_buffer = Vec::new();
let mut strings_key_buffer = Vec::new(); let mut strings_key_buffer = Vec::new();
let old_faceted_fids: BTreeSet<_> = let mut cursor = obkv_documents.into_cursor()?;
settings_diff.old.faceted_fields_ids.iter().copied().collect(); while let Some((docid_bytes, value)) = cursor.move_on_next()? {
let new_faceted_fids: BTreeSet<_> = let obkv = obkv::KvReader::new(value);
settings_diff.new.faceted_fields_ids.iter().copied().collect();
if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids { for (field_id, field_bytes) in obkv.iter() {
let mut cursor = obkv_documents.into_cursor()?; let delete_faceted = settings_diff.old.faceted_fields_ids.contains(&field_id);
while let Some((docid_bytes, value)) = cursor.move_on_next()? { let add_faceted = settings_diff.new.faceted_fields_ids.contains(&field_id);
let obkv = obkv::KvReader::new(value); if delete_faceted || add_faceted {
let get_document_json_value = move |field_id, side| { numbers_key_buffer.clear();
obkv.get(field_id) strings_key_buffer.clear();
.map(KvReaderDelAdd::new)
.and_then(|kv| kv.get(side))
.map(from_slice)
.transpose()
.map_err(InternalError::SerdeJson)
};
// iterate over the faceted fields instead of over the whole document.
for eob in
merge_join_by(old_faceted_fids.iter(), new_faceted_fids.iter(), |old, new| {
old.cmp(new)
})
{
let (field_id, del_value, add_value) = match eob {
EitherOrBoth::Left(&field_id) => {
let del_value = get_document_json_value(field_id, DelAdd::Deletion)?;
// deletion only // Set key to the field_id
(field_id, del_value, None) // Note: this encoding is consistent with FieldIdCodec
} numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes());
EitherOrBoth::Right(&field_id) => { strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());
let add_value = get_document_json_value(field_id, DelAdd::Addition)?;
// addition only let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
(field_id, None, add_value) let document = DocumentId::from_be_bytes(document);
}
EitherOrBoth::Both(&field_id, _) => {
// during settings update, recompute the changing settings only.
if settings_diff.settings_update_only {
continue;
}
let del_value = get_document_json_value(field_id, DelAdd::Deletion)?; // For the other extraction tasks, prefix the key with the field_id and the document_id
let add_value = get_document_json_value(field_id, DelAdd::Addition)?; numbers_key_buffer.extend_from_slice(docid_bytes);
strings_key_buffer.extend_from_slice(docid_bytes);
(field_id, del_value, add_value) let del_add_obkv = obkv::KvReader::new(field_bytes);
} let del_value = match del_add_obkv.get(DelAdd::Deletion).filter(|_| delete_faceted)
{
Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
None => None,
};
let add_value = match del_add_obkv.get(DelAdd::Addition).filter(|_| add_faceted) {
Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
None => None,
}; };
if del_value.is_some() || add_value.is_some() { // We insert the document id on the Del and the Add side if the field exists.
numbers_key_buffer.clear(); let (ref mut del_exists, ref mut add_exists) =
strings_key_buffer.clear(); facet_exists_docids.entry(field_id).or_default();
let (ref mut del_is_null, ref mut add_is_null) =
facet_is_null_docids.entry(field_id).or_default();
let (ref mut del_is_empty, ref mut add_is_empty) =
facet_is_empty_docids.entry(field_id).or_default();
// Set key to the field_id if del_value.is_some() {
// Note: this encoding is consistent with FieldIdCodec del_exists.insert(document);
numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes()); }
strings_key_buffer.extend_from_slice(&field_id.to_be_bytes()); if add_value.is_some() {
add_exists.insert(document);
}
let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap(); let geo_support =
let document = DocumentId::from_be_bytes(document); geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
let del_filterable_values =
del_value.map(|value| extract_facet_values(&value, geo_support));
let add_filterable_values =
add_value.map(|value| extract_facet_values(&value, geo_support));
// For the other extraction tasks, prefix the key with the field_id and the document_id // Those closures are just here to simplify things a bit.
numbers_key_buffer.extend_from_slice(docid_bytes); let mut insert_numbers_diff = |del_numbers, add_numbers| {
strings_key_buffer.extend_from_slice(docid_bytes); insert_numbers_diff(
&mut fid_docid_facet_numbers_sorter,
&mut numbers_key_buffer,
del_numbers,
add_numbers,
)
};
let mut insert_strings_diff = |del_strings, add_strings| {
insert_strings_diff(
&mut fid_docid_facet_strings_sorter,
&mut strings_key_buffer,
del_strings,
add_strings,
)
};
// We insert the document id on the Del and the Add side if the field exists. match (del_filterable_values, add_filterable_values) {
let (ref mut del_exists, ref mut add_exists) = (None, None) => (),
facet_exists_docids.entry(field_id).or_default(); (Some(del_filterable_values), None) => match del_filterable_values {
let (ref mut del_is_null, ref mut add_is_null) = Null => {
facet_is_null_docids.entry(field_id).or_default(); del_is_null.insert(document);
let (ref mut del_is_empty, ref mut add_is_empty) = }
facet_is_empty_docids.entry(field_id).or_default(); Empty => {
del_is_empty.insert(document);
if del_value.is_some() { }
del_exists.insert(document); Values { numbers, strings } => {
} insert_numbers_diff(numbers, vec![])?;
if add_value.is_some() { insert_strings_diff(strings, vec![])?;
add_exists.insert(document); }
} },
(None, Some(add_filterable_values)) => match add_filterable_values {
let del_geo_support = settings_diff Null => {
.old add_is_null.insert(document);
.geo_fields_ids }
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng); Empty => {
let add_geo_support = settings_diff add_is_empty.insert(document);
.new }
.geo_fields_ids Values { numbers, strings } => {
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng); insert_numbers_diff(vec![], numbers)?;
let del_filterable_values = insert_strings_diff(vec![], strings)?;
del_value.map(|value| extract_facet_values(&value, del_geo_support)); }
let add_filterable_values = },
add_value.map(|value| extract_facet_values(&value, add_geo_support)); (Some(del_filterable_values), Some(add_filterable_values)) => {
match (del_filterable_values, add_filterable_values) {
// Those closures are just here to simplify things a bit. (Null, Null) | (Empty, Empty) => (),
let mut insert_numbers_diff = |del_numbers, add_numbers| { (Null, Empty) => {
insert_numbers_diff( del_is_null.insert(document);
&mut fid_docid_facet_numbers_sorter, add_is_empty.insert(document);
&mut numbers_key_buffer, }
del_numbers, (Empty, Null) => {
add_numbers, del_is_empty.insert(document);
) add_is_null.insert(document);
}; }
let mut insert_strings_diff = |del_strings, add_strings| { (Null, Values { numbers, strings }) => {
insert_strings_diff( insert_numbers_diff(vec![], numbers)?;
&mut fid_docid_facet_strings_sorter, insert_strings_diff(vec![], strings)?;
&mut strings_key_buffer,
del_strings,
add_strings,
)
};
match (del_filterable_values, add_filterable_values) {
(None, None) => (),
(Some(del_filterable_values), None) => match del_filterable_values {
Null => {
del_is_null.insert(document); del_is_null.insert(document);
} }
Empty => { (Empty, Values { numbers, strings }) => {
insert_numbers_diff(vec![], numbers)?;
insert_strings_diff(vec![], strings)?;
del_is_empty.insert(document); del_is_empty.insert(document);
} }
Values { numbers, strings } => { (Values { numbers, strings }, Null) => {
add_is_null.insert(document);
insert_numbers_diff(numbers, vec![])?; insert_numbers_diff(numbers, vec![])?;
insert_strings_diff(strings, vec![])?; insert_strings_diff(strings, vec![])?;
} }
}, (Values { numbers, strings }, Empty) => {
(None, Some(add_filterable_values)) => match add_filterable_values {
Null => {
add_is_null.insert(document);
}
Empty => {
add_is_empty.insert(document); add_is_empty.insert(document);
insert_numbers_diff(numbers, vec![])?;
insert_strings_diff(strings, vec![])?;
} }
Values { numbers, strings } => { (
insert_numbers_diff(vec![], numbers)?; Values { numbers: del_numbers, strings: del_strings },
insert_strings_diff(vec![], strings)?; Values { numbers: add_numbers, strings: add_strings },
} ) => {
}, insert_numbers_diff(del_numbers, add_numbers)?;
(Some(del_filterable_values), Some(add_filterable_values)) => { insert_strings_diff(del_strings, add_strings)?;
match (del_filterable_values, add_filterable_values) {
(Null, Null) | (Empty, Empty) => (),
(Null, Empty) => {
del_is_null.insert(document);
add_is_empty.insert(document);
}
(Empty, Null) => {
del_is_empty.insert(document);
add_is_null.insert(document);
}
(Null, Values { numbers, strings }) => {
insert_numbers_diff(vec![], numbers)?;
insert_strings_diff(vec![], strings)?;
del_is_null.insert(document);
}
(Empty, Values { numbers, strings }) => {
insert_numbers_diff(vec![], numbers)?;
insert_strings_diff(vec![], strings)?;
del_is_empty.insert(document);
}
(Values { numbers, strings }, Null) => {
add_is_null.insert(document);
insert_numbers_diff(numbers, vec![])?;
insert_strings_diff(strings, vec![])?;
}
(Values { numbers, strings }, Empty) => {
add_is_empty.insert(document);
insert_numbers_diff(numbers, vec![])?;
insert_strings_diff(strings, vec![])?;
}
(
Values { numbers: del_numbers, strings: del_strings },
Values { numbers: add_numbers, strings: add_strings },
) => {
insert_numbers_diff(del_numbers, add_numbers)?;
insert_strings_diff(del_strings, add_strings)?;
}
} }
} }
} }

View File

@@ -8,7 +8,6 @@ use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::error::GeoError; use crate::error::GeoError;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::index_documents::extract_finite_float_from_value; use crate::update::index_documents::extract_finite_float_from_value;
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::{FieldId, InternalError, Result}; use crate::{FieldId, InternalError, Result};
/// Extracts the geographical coordinates contained in each document under the `_geo` field. /// Extracts the geographical coordinates contained in each document under the `_geo` field.
@@ -19,7 +18,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>, obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,
primary_key_id: FieldId, primary_key_id: FieldId,
settings_diff: &InnerIndexSettingsDiff, (lat_fid, lng_fid): (FieldId, FieldId),
) -> Result<grenad::Reader<BufReader<File>>> { ) -> Result<grenad::Reader<BufReader<File>>> {
let mut writer = create_writer( let mut writer = create_writer(
indexer.chunk_compression_type, indexer.chunk_compression_type,
@@ -39,27 +38,47 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
serde_json::from_slice(document_id).unwrap() serde_json::from_slice(document_id).unwrap()
}; };
// extract old version // first we get the two fields
let del_lat_lng = match (obkv.get(lat_fid), obkv.get(lng_fid)) {
extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?; (Some(lat), Some(lng)) => {
// extract new version let deladd_lat_obkv = KvReaderDelAdd::new(lat);
let add_lat_lng = let deladd_lng_obkv = KvReaderDelAdd::new(lng);
extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
if del_lat_lng != add_lat_lng { // then we extract the values
let mut obkv = KvWriterDelAdd::memory(); let del_lat_lng = deladd_lat_obkv
if let Some([lat, lng]) = del_lat_lng { .get(DelAdd::Deletion)
#[allow(clippy::drop_non_drop)] .zip(deladd_lng_obkv.get(DelAdd::Deletion))
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
obkv.insert(DelAdd::Deletion, bytes)?; .transpose()?;
let add_lat_lng = deladd_lat_obkv
.get(DelAdd::Addition)
.zip(deladd_lng_obkv.get(DelAdd::Addition))
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
.transpose()?;
if del_lat_lng != add_lat_lng {
let mut obkv = KvWriterDelAdd::memory();
if let Some([lat, lng]) = del_lat_lng {
#[allow(clippy::drop_non_drop)]
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
obkv.insert(DelAdd::Deletion, bytes)?;
}
if let Some([lat, lng]) = add_lat_lng {
#[allow(clippy::drop_non_drop)]
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
obkv.insert(DelAdd::Addition, bytes)?;
}
let bytes = obkv.into_inner()?;
writer.insert(docid_bytes, bytes)?;
}
} }
if let Some([lat, lng]) = add_lat_lng { (None, Some(_)) => {
#[allow(clippy::drop_non_drop)] return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
obkv.insert(DelAdd::Addition, bytes)?;
} }
let bytes = obkv.into_inner()?; (Some(_), None) => {
writer.insert(docid_bytes, bytes)?; return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
}
(None, None) => (),
} }
} }
@@ -67,37 +86,16 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
} }
/// Extract the finite floats lat and lng from two bytes slices. /// Extract the finite floats lat and lng from two bytes slices.
fn extract_lat_lng( fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
document: &obkv::KvReader<FieldId>, let lat = extract_finite_float_from_value(
settings: &InnerIndexSettings, serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
deladd: DelAdd, )
document_id: impl Fn() -> Value, .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
) -> Result<Option<[f64; 2]>> {
match settings.geo_fields_ids {
Some((lat_fid, lng_fid)) => {
let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
let (lat, lng) = match (lat, lng) {
(Some(lat), Some(lng)) => (lat, lng),
(Some(_), None) => {
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
}
(None, Some(_)) => {
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
}
(None, None) => return Ok(None),
};
let lat = extract_finite_float_from_value(
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
)
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
let lng = extract_finite_float_from_value( let lng = extract_finite_float_from_value(
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?, serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
) )
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?; .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
Ok(Some([lat, lng]))
} Ok([lat, lng])
None => Ok(None),
}
} }

View File

@@ -26,8 +26,11 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
indexer: GrenadParameters, indexer: GrenadParameters,
settings_diff: &InnerIndexSettingsDiff, settings_diff: &InnerIndexSettingsDiff,
) -> Result<grenad::Reader<BufReader<File>>> { ) -> Result<grenad::Reader<BufReader<File>>> {
let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;
// early return if the data shouldn't be deleted nor created. // early return if the data shouldn't be deleted nor created.
if settings_diff.settings_update_only && !settings_diff.reindex_proximities() { if !any_deletion && !any_addition {
let writer = create_writer( let writer = create_writer(
indexer.chunk_compression_type, indexer.chunk_compression_type,
indexer.chunk_compression_level, indexer.chunk_compression_level,
@@ -36,10 +39,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
return writer_into_reader(writer); return writer_into_reader(writer);
} }
let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;
let max_memory = indexer.max_memory_by_thread(); let max_memory = indexer.max_memory_by_thread();
let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE) let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE)
.map(|_| { .map(|_| {
create_sorter( create_sorter(

View File

@@ -43,6 +43,7 @@ pub(crate) fn data_from_obkv_documents(
indexer: GrenadParameters, indexer: GrenadParameters,
lmdb_writer_sx: Sender<Result<TypedChunk>>, lmdb_writer_sx: Sender<Result<TypedChunk>>,
primary_key_id: FieldId, primary_key_id: FieldId,
geo_fields_ids: Option<(FieldId, FieldId)>,
settings_diff: Arc<InnerIndexSettingsDiff>, settings_diff: Arc<InnerIndexSettingsDiff>,
max_positions_per_attributes: Option<u32>, max_positions_per_attributes: Option<u32>,
) -> Result<()> { ) -> Result<()> {
@@ -69,6 +70,7 @@ pub(crate) fn data_from_obkv_documents(
indexer, indexer,
lmdb_writer_sx.clone(), lmdb_writer_sx.clone(),
primary_key_id, primary_key_id,
geo_fields_ids,
settings_diff.clone(), settings_diff.clone(),
max_positions_per_attributes, max_positions_per_attributes,
) )
@@ -291,6 +293,7 @@ fn send_and_extract_flattened_documents_data(
indexer: GrenadParameters, indexer: GrenadParameters,
lmdb_writer_sx: Sender<Result<TypedChunk>>, lmdb_writer_sx: Sender<Result<TypedChunk>>,
primary_key_id: FieldId, primary_key_id: FieldId,
geo_fields_ids: Option<(FieldId, FieldId)>,
settings_diff: Arc<InnerIndexSettingsDiff>, settings_diff: Arc<InnerIndexSettingsDiff>,
max_positions_per_attributes: Option<u32>, max_positions_per_attributes: Option<u32>,
) -> Result<( ) -> Result<(
@@ -300,13 +303,12 @@ fn send_and_extract_flattened_documents_data(
let flattened_documents_chunk = let flattened_documents_chunk =
flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?; flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
if settings_diff.run_geo_indexing() { if let Some(geo_fields_ids) = geo_fields_ids {
let documents_chunk_cloned = flattened_documents_chunk.clone(); let documents_chunk_cloned = flattened_documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone(); let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
let settings_diff = settings_diff.clone();
rayon::spawn(move || { rayon::spawn(move || {
let result = let result =
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, &settings_diff); extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_fields_ids);
let _ = match result { let _ = match result {
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))), Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
Err(error) => lmdb_writer_sx_cloned.send(Err(error)), Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
@@ -345,6 +347,7 @@ fn send_and_extract_flattened_documents_data(
flattened_documents_chunk.clone(), flattened_documents_chunk.clone(),
indexer, indexer,
&settings_diff, &settings_diff,
geo_fields_ids,
)?; )?;
// send fid_docid_facet_numbers_chunk to DB writer // send fid_docid_facet_numbers_chunk to DB writer

View File

@@ -315,6 +315,28 @@ where
// get the primary key field id // get the primary key field id
let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap(); let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap();
// get the fid of the `_geo.lat` and `_geo.lng` fields.
let mut field_id_map = self.index.fields_ids_map(self.wtxn)?;
// self.index.fields_ids_map($a)? ==>> field_id_map
let geo_fields_ids = match field_id_map.id("_geo") {
Some(gfid) => {
let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
// if `_geo` is faceted then we get the `lat` and `lng`
if is_sortable || is_filterable {
let field_ids = field_id_map
.insert("_geo.lat")
.zip(field_id_map.insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
Some(field_ids)
} else {
None
}
}
None => None,
};
let pool_params = GrenadParameters { let pool_params = GrenadParameters {
chunk_compression_type: self.indexer_config.chunk_compression_type, chunk_compression_type: self.indexer_config.chunk_compression_type,
chunk_compression_level: self.indexer_config.chunk_compression_level, chunk_compression_level: self.indexer_config.chunk_compression_level,
@@ -369,7 +391,6 @@ where
// Run extraction pipeline in parallel. // Run extraction pipeline in parallel.
pool.install(|| { pool.install(|| {
let settings_diff_cloned = settings_diff.clone();
rayon::spawn(move || { rayon::spawn(move || {
let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "extract_and_send_grenad_chunks"); let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "extract_and_send_grenad_chunks");
let _enter = child_span.enter(); let _enter = child_span.enter();
@@ -399,7 +420,8 @@ where
pool_params, pool_params,
lmdb_writer_sx.clone(), lmdb_writer_sx.clone(),
primary_key_id, primary_key_id,
settings_diff_cloned, geo_fields_ids,
settings_diff.clone(),
max_positions_per_attributes, max_positions_per_attributes,
) )
}); });
@@ -426,7 +448,7 @@ where
Err(status) => { Err(status) => {
if let Some(typed_chunks) = chunk_accumulator.pop_longest() { if let Some(typed_chunks) = chunk_accumulator.pop_longest() {
let (docids, is_merged_database) = let (docids, is_merged_database) =
write_typed_chunk_into_index(self.wtxn, self.index, &settings_diff, typed_chunks)?; write_typed_chunk_into_index(typed_chunks, self.index, self.wtxn)?;
if !docids.is_empty() { if !docids.is_empty() {
final_documents_ids |= docids; final_documents_ids |= docids;
let documents_seen_count = final_documents_ids.len(); let documents_seen_count = final_documents_ids.len();

View File

@@ -20,10 +20,7 @@ use super::{IndexDocumentsMethod, IndexerConfig};
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader}; use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
use crate::error::{Error, InternalError, UserError}; use crate::error::{Error, InternalError, UserError};
use crate::index::{db_name, main_key}; use crate::index::{db_name, main_key};
use crate::update::del_add::{ use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation,
KvReaderDelAdd,
};
use crate::update::index_documents::GrenadParameters; use crate::update::index_documents::GrenadParameters;
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
@@ -808,15 +805,13 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut new_inner_settings = old_inner_settings.clone(); let mut new_inner_settings = old_inner_settings.clone();
new_inner_settings.fields_ids_map = fields_ids_map; new_inner_settings.fields_ids_map = fields_ids_map;
let embedding_configs_updated = false; let settings_diff = InnerIndexSettingsDiff {
let settings_update_only = false; old: old_inner_settings,
let settings_diff = InnerIndexSettingsDiff::new( new: new_inner_settings,
old_inner_settings,
new_inner_settings,
primary_key_id, primary_key_id,
embedding_configs_updated, embedding_configs_updated: false,
settings_update_only, settings_update_only: false,
); };
Ok(TransformOutput { Ok(TransformOutput {
primary_key, primary_key,
@@ -845,6 +840,14 @@ impl<'a, 'i> Transform<'a, 'i> {
// Always keep the primary key. // Always keep the primary key.
let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) }; let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
// If only the `searchableAttributes` has been changed, keep only the searchable fields.
let must_reindex_searchables = settings_diff.reindex_searchable();
let necessary_searchable_field = |id: FieldId| -> bool {
must_reindex_searchables
&& (settings_diff.old.searchable_fields_ids.contains(&id)
|| settings_diff.new.searchable_fields_ids.contains(&id))
};
// If only a faceted field has been added, keep only this field. // If only a faceted field has been added, keep only this field.
let must_reindex_facets = settings_diff.reindex_facets(); let must_reindex_facets = settings_diff.reindex_facets();
let necessary_faceted_field = |id: FieldId| -> bool { let necessary_faceted_field = |id: FieldId| -> bool {
@@ -859,16 +862,13 @@ impl<'a, 'i> Transform<'a, 'i> {
// we need the fields for the prompt/templating. // we need the fields for the prompt/templating.
let reindex_vectors = settings_diff.reindex_vectors(); let reindex_vectors = settings_diff.reindex_vectors();
// The operations that we must perform on the different fields.
let mut operations = HashMap::new();
let mut obkv_writer = KvWriter::<_, FieldId>::memory(); let mut obkv_writer = KvWriter::<_, FieldId>::memory();
for (id, val) in old_obkv.iter() { for (id, val) in old_obkv.iter() {
if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors { if is_primary_key(id)
operations.insert(id, DelAddOperation::DeletionAndAddition); || necessary_searchable_field(id)
obkv_writer.insert(id, val)?; || necessary_faceted_field(id)
} else if let Some(operation) = settings_diff.reindex_searchable_id(id) { || reindex_vectors
operations.insert(id, operation); {
obkv_writer.insert(id, val)?; obkv_writer.insert(id, val)?;
} }
} }
@@ -887,9 +887,11 @@ impl<'a, 'i> Transform<'a, 'i> {
let flattened = flattened.as_deref().map_or(obkv, KvReader::new); let flattened = flattened.as_deref().map_or(obkv, KvReader::new);
flattened_obkv_buffer.clear(); flattened_obkv_buffer.clear();
into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| { into_del_add_obkv(
operations.get(&id).copied().unwrap_or(DelAddOperation::DeletionAndAddition) flattened,
})?; DelAddOperation::DeletionAndAddition,
flattened_obkv_buffer,
)?;
} }
Ok(()) Ok(())
@@ -899,11 +901,6 @@ impl<'a, 'i> Transform<'a, 'i> {
/// of the index with the attributes reordered accordingly to the `FieldsIdsMap` given as argument. /// of the index with the attributes reordered accordingly to the `FieldsIdsMap` given as argument.
/// ///
// TODO this can be done in parallel by using the rayon `ThreadPool`. // TODO this can be done in parallel by using the rayon `ThreadPool`.
#[tracing::instrument(
level = "trace"
skip(self, wtxn, settings_diff),
target = "indexing::documents"
)]
pub fn prepare_for_documents_reindexing( pub fn prepare_for_documents_reindexing(
self, self,
wtxn: &mut heed::RwTxn<'i>, wtxn: &mut heed::RwTxn<'i>,

View File

@@ -7,7 +7,7 @@ use bytemuck::allocation::pod_collect_to_vec;
use charabia::{Language, Script}; use charabia::{Language, Script};
use grenad::{Merger, MergerBuilder}; use grenad::{Merger, MergerBuilder};
use heed::types::Bytes; use heed::types::Bytes;
use heed::{BytesDecode, RwTxn}; use heed::RwTxn;
use obkv::{KvReader, KvWriter}; use obkv::{KvReader, KvWriter};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@@ -20,16 +20,13 @@ use super::MergeFn;
use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind}; use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::index::db_name::DOCUMENTS; use crate::index::db_name::DOCUMENTS;
use crate::proximity::MAX_DISTANCE;
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd}; use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
use crate::update::facet::FacetsUpdate; use crate::update::facet::FacetsUpdate;
use crate::update::index_documents::helpers::{ use crate::update::index_documents::helpers::{
as_cloneable_grenad, keep_latest_obkv, try_split_array_at, as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
}; };
use crate::update::settings::InnerIndexSettingsDiff;
use crate::{ use crate::{
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError,
Result, SerializationError, U8StrStrCodec,
}; };
/// This struct accumulates and group the TypedChunks /// This struct accumulates and group the TypedChunks
@@ -125,10 +122,9 @@ impl TypedChunk {
/// Return new documents seen. /// Return new documents seen.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] #[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
pub(crate) fn write_typed_chunk_into_index( pub(crate) fn write_typed_chunk_into_index(
wtxn: &mut RwTxn,
index: &Index,
settings_diff: &InnerIndexSettingsDiff,
typed_chunks: Vec<TypedChunk>, typed_chunks: Vec<TypedChunk>,
index: &Index,
wtxn: &mut RwTxn,
) -> Result<(RoaringBitmap, bool)> { ) -> Result<(RoaringBitmap, bool)> {
let mut is_merged_database = false; let mut is_merged_database = false;
match typed_chunks[0] { match typed_chunks[0] {
@@ -489,22 +485,13 @@ pub(crate) fn write_typed_chunk_into_index(
} }
let merger = builder.build(); let merger = builder.build();
if settings_diff.only_additional_fields.is_some() { write_entries_into_database(
write_proximity_entries_into_database_additional_searchables( merger,
merger, &index.word_pair_proximity_docids,
&index.word_pair_proximity_docids, wtxn,
wtxn, deladd_serialize_add_side,
)?; merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
} else { )?;
write_entries_into_database(
merger,
&index.word_pair_proximity_docids,
wtxn,
deladd_serialize_add_side,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
)?;
}
is_merged_database = true; is_merged_database = true;
} }
TypedChunk::FieldIdDocidFacetNumbers(_) => { TypedChunk::FieldIdDocidFacetNumbers(_) => {
@@ -843,51 +830,3 @@ where
} }
Ok(()) Ok(())
} }
/// Akin to the `write_entries_into_database` function but specialized
/// for the case when we only index additional searchable fields only.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
fn write_proximity_entries_into_database_additional_searchables<R>(
merger: Merger<R, MergeFn>,
database: &heed::Database<U8StrStrCodec, CboRoaringBitmapCodec>,
wtxn: &mut RwTxn,
) -> Result<()>
where
R: io::Read + io::Seek,
{
let mut iter = merger.into_stream_merger_iter()?;
while let Some((key, value)) = iter.next()? {
if valid_lmdb_key(key) {
let (proximity_to_insert, word1, word2) =
U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?;
let data_to_insert = match KvReaderDelAdd::new(value).get(DelAdd::Addition) {
Some(value) => {
CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
}
None => continue,
};
let mut data_to_remove = RoaringBitmap::new();
for prox in 1..(MAX_DISTANCE as u8) {
let key = (prox, word1, word2);
let database_value = database.get(wtxn, &key)?.unwrap_or_default();
let value = if prox == proximity_to_insert {
// Proximity that should be changed.
// Union values and remove lower proximity data
(&database_value | &data_to_insert) - &data_to_remove
} else {
// Remove lower proximity data
&database_value - &data_to_remove
};
// add the current data in data_to_remove for the next proximities
data_to_remove |= &value;
if database_value != value {
database.put(wtxn, &key, &value)?;
}
}
}
}
Ok(())
}

View File

@@ -9,7 +9,6 @@ use itertools::{EitherOrBoth, Itertools};
use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde::{Deserialize, Deserializer, Serialize, Serializer};
use time::OffsetDateTime; use time::OffsetDateTime;
use super::del_add::DelAddOperation;
use super::index_documents::{IndexDocumentsConfig, Transform}; use super::index_documents::{IndexDocumentsConfig, Transform};
use super::IndexerConfig; use super::IndexerConfig;
use crate::criterion::Criterion; use crate::criterion::Criterion;
@@ -1073,14 +1072,13 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
.index .index
.primary_key(self.wtxn)? .primary_key(self.wtxn)?
.and_then(|name| new_inner_settings.fields_ids_map.id(name)); .and_then(|name| new_inner_settings.fields_ids_map.id(name));
let settings_update_only = true; let inner_settings_diff = InnerIndexSettingsDiff {
let inner_settings_diff = InnerIndexSettingsDiff::new( old: old_inner_settings,
old_inner_settings, new: new_inner_settings,
new_inner_settings,
primary_key_id, primary_key_id,
embedding_configs_updated, embedding_configs_updated,
settings_update_only, settings_update_only: true,
); };
if inner_settings_diff.any_reindexing_needed() { if inner_settings_diff.any_reindexing_needed() {
self.reindex(&progress_callback, &should_abort, inner_settings_diff)?; self.reindex(&progress_callback, &should_abort, inner_settings_diff)?;
@@ -1097,104 +1095,21 @@ pub struct InnerIndexSettingsDiff {
// TODO: compare directly the embedders. // TODO: compare directly the embedders.
pub(crate) embedding_configs_updated: bool, pub(crate) embedding_configs_updated: bool,
pub(crate) settings_update_only: bool, pub(crate) settings_update_only: bool,
/// The set of only the additional searchable fields.
/// If any other searchable field has been modified, is set to None.
pub(crate) only_additional_fields: Option<HashSet<String>>,
// Cache the check to see if all the stop_words, allowed_separators, dictionary,
// exact_attributes, proximity_precision are different.
pub(crate) cache_reindex_searchable_without_user_defined: bool,
// Cache the check to see if the user_defined_searchables are different.
pub(crate) cache_user_defined_searchables: bool,
// Cache the check to see if the exact_attributes are different.
pub(crate) cache_exact_attributes: bool,
} }
impl InnerIndexSettingsDiff { impl InnerIndexSettingsDiff {
#[tracing::instrument(level = "trace", skip_all, target = "indexing::settings")]
pub(crate) fn new(
old_settings: InnerIndexSettings,
new_settings: InnerIndexSettings,
primary_key_id: Option<FieldId>,
embedding_configs_updated: bool,
settings_update_only: bool,
) -> Self {
let only_additional_fields = match (
&old_settings.user_defined_searchable_fields,
&new_settings.user_defined_searchable_fields,
) {
(None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
(Some(old), Some(new)) => {
let old: HashSet<_> = old.iter().cloned().collect();
let new: HashSet<_> = new.iter().cloned().collect();
if old.difference(&new).next().is_none() {
// if no field has been removed return only the additional ones
Some(&new - &old).filter(|x| !x.is_empty())
} else {
None
}
}
};
let cache_reindex_searchable_without_user_defined = {
old_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
!= new_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
|| old_settings.allowed_separators != new_settings.allowed_separators
|| old_settings.dictionary != new_settings.dictionary
|| old_settings.proximity_precision != new_settings.proximity_precision
};
let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
!= new_settings.user_defined_searchable_fields;
InnerIndexSettingsDiff {
old: old_settings,
new: new_settings,
primary_key_id,
embedding_configs_updated,
settings_update_only,
only_additional_fields,
cache_reindex_searchable_without_user_defined,
cache_user_defined_searchables,
cache_exact_attributes,
}
}
pub fn any_reindexing_needed(&self) -> bool { pub fn any_reindexing_needed(&self) -> bool {
self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors() self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors()
} }
pub fn reindex_searchable(&self) -> bool { pub fn reindex_searchable(&self) -> bool {
self.cache_reindex_searchable_without_user_defined self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
|| self.cache_exact_attributes != self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
|| self.cache_user_defined_searchables || self.old.allowed_separators != self.new.allowed_separators
} || self.old.dictionary != self.new.dictionary
|| self.old.user_defined_searchable_fields != self.new.user_defined_searchable_fields
pub fn reindex_proximities(&self) -> bool { || self.old.exact_attributes != self.new.exact_attributes
// if any searchable settings force the reindexing || self.old.proximity_precision != self.new.proximity_precision
(self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables)
// and if any settings needs the proximity database created
&& (self.old.proximity_precision == ProximityPrecision::ByAttribute
|| self.new.proximity_precision == ProximityPrecision::ByAttribute)
}
pub fn reindex_searchable_id(&self, id: FieldId) -> Option<DelAddOperation> {
if self.cache_reindex_searchable_without_user_defined || self.cache_exact_attributes {
Some(DelAddOperation::DeletionAndAddition)
} else if let Some(only_additional_fields) = &self.only_additional_fields {
let additional_field = self.new.fields_ids_map.name(id).unwrap();
if only_additional_fields.contains(additional_field) {
Some(DelAddOperation::Addition)
} else {
None
}
} else if self.cache_user_defined_searchables {
Some(DelAddOperation::DeletionAndAddition)
} else {
None
}
} }
pub fn reindex_facets(&self) -> bool { pub fn reindex_facets(&self) -> bool {
@@ -1227,11 +1142,6 @@ impl InnerIndexSettingsDiff {
self.settings_update_only self.settings_update_only
} }
pub fn run_geo_indexing(&self) -> bool {
self.old.geo_fields_ids != self.new.geo_fields_ids
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
}
pub fn modified_faceted_fields(&self) -> HashSet<String> { pub fn modified_faceted_fields(&self) -> HashSet<String> {
&self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields &self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields
} }
@@ -1251,7 +1161,6 @@ pub(crate) struct InnerIndexSettings {
pub proximity_precision: ProximityPrecision, pub proximity_precision: ProximityPrecision,
pub embedding_configs: EmbeddingConfigs, pub embedding_configs: EmbeddingConfigs,
pub existing_fields: HashSet<String>, pub existing_fields: HashSet<String>,
pub geo_fields_ids: Option<(FieldId, FieldId)>,
} }
impl InnerIndexSettings { impl InnerIndexSettings {
@@ -1260,7 +1169,7 @@ impl InnerIndexSettings {
let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap()); let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
let allowed_separators = index.allowed_separators(rtxn)?; let allowed_separators = index.allowed_separators(rtxn)?;
let dictionary = index.dictionary(rtxn)?; let dictionary = index.dictionary(rtxn)?;
let mut fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?; let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
let user_defined_searchable_fields = let user_defined_searchable_fields =
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect()); user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
@@ -1275,24 +1184,6 @@ impl InnerIndexSettings {
.into_iter() .into_iter()
.filter_map(|(field, count)| (count != 0).then_some(field)) .filter_map(|(field, count)| (count != 0).then_some(field))
.collect(); .collect();
// index.fields_ids_map($a)? ==>> fields_ids_map
let geo_fields_ids = match fields_ids_map.id("_geo") {
Some(gfid) => {
let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
// if `_geo` is faceted then we get the `lat` and `lng`
if is_sortable || is_filterable {
let field_ids = fields_ids_map
.insert("_geo.lat")
.zip(fields_ids_map.insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
Some(field_ids)
} else {
None
}
}
None => None,
};
Ok(Self { Ok(Self {
stop_words, stop_words,
@@ -1307,7 +1198,6 @@ impl InnerIndexSettings {
proximity_precision, proximity_precision,
embedding_configs, embedding_configs,
existing_fields, existing_fields,
geo_fields_ids,
}) })
} }
@@ -1665,7 +1555,7 @@ mod tests {
// When we search for something that is not in // When we search for something that is not in
// the searchable fields it must not return any document. // the searchable fields it must not return any document.
let result = index.search(&rtxn).query("23").execute().unwrap(); let result = index.search(&rtxn).query("23").execute().unwrap();
assert_eq!(result.documents_ids, Vec::<u32>::new()); assert!(result.documents_ids.is_empty());
// When we search for something that is in the searchable fields // When we search for something that is in the searchable fields
// we must find the appropriate document. // we must find the appropriate document.

View File

@@ -159,7 +159,6 @@ pub fn expected_order(
match optional_words { match optional_words {
TermsMatchingStrategy::Last => groups.into_iter().flatten().collect(), TermsMatchingStrategy::Last => groups.into_iter().flatten().collect(),
TermsMatchingStrategy::Frequency => groups.into_iter().flatten().collect(),
TermsMatchingStrategy::All => { TermsMatchingStrategy::All => {
groups.into_iter().flatten().filter(|d| d.word_rank == 0).collect() groups.into_iter().flatten().filter(|d| d.word_rank == 0).collect()
} }