mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-18 04:11:07 +00:00
Compare commits
210 Commits
index-stat
...
prototype-
Author | SHA1 | Date | |
---|---|---|---|
9d5e3457e5 | |||
04694071fe | |||
b0c1a9504a | |||
d57026cd96 | |||
41c9e8856a | |||
d4ff59fcf5 | |||
9c485f8563 | |||
d8d12d5979 | |||
0597a97c84 | |||
2dfbb6813a | |||
8f589a5cce | |||
0b8bbd8750 | |||
eef95de30e | |||
13a13a4862 | |||
e691c92ed5 | |||
928ab2f9b1 | |||
7c18a9375f | |||
05a311f9be | |||
9b1b9b409e | |||
7f555f23e8 | |||
a0bfc9f63a | |||
3155264381 | |||
42400c381e | |||
08c7dab528 | |||
8590687515 | |||
8f5d127b1e | |||
2b4160ebb9 | |||
8ba1c8f88f | |||
8e7edf8ea7 | |||
9daccdf7f0 | |||
437ee55c57 | |||
b1717865ea | |||
176f716292 | |||
a0df4becf4 | |||
e0a2f88fb0 | |||
e871906370 | |||
7a80c0dfb3 | |||
a9f691f279 | |||
1d40452057 | |||
324d448236 | |||
40ad19ba9e | |||
cab4c4d7c9 | |||
4ec08e9430 | |||
661d1f90dc | |||
6ec7541026 | |||
e8dee3ca65 | |||
a82c49ab08 | |||
84845de9ef | |||
c9b3f80947 | |||
09c5edf242 | |||
4e85f91aee | |||
7c157fc442 | |||
0b97596c93 | |||
a0e0fce677 | |||
3c295c1ffc | |||
b951830461 | |||
9a13b72f25 | |||
1d8dfafd25 | |||
eed9176e0c | |||
b132e859f7 | |||
9917bf046a | |||
d9fea0143f | |||
a385642ec3 | |||
34b2e98fe9 | |||
80bbd4b6f3 | |||
f42bef2f66 | |||
bd3c026406 | |||
84f8938f33 | |||
34a07110de | |||
73bb080a26 | |||
44b5b9e1a7 | |||
68356869c0 | |||
605c1dd54a | |||
3e3f73ba1e | |||
efbe7ce78b | |||
82e1f59f1e | |||
362e9ff845 | |||
32f2556d22 | |||
63fd10aaa5 | |||
29b40295b8 | |||
26f0fa678d | |||
60ddd53439 | |||
2bcd8d2983 | |||
09079a4e88 | |||
904f6574bf | |||
6fb8af423c | |||
cb0bb399fa | |||
41760a9306 | |||
e9a3029c30 | |||
ed0ff47551 | |||
e1b8fb48ee | |||
87e22e436a | |||
0252cfe8b6 | |||
f35ad96afa | |||
2ceb781c73 | |||
7bd67543dd | |||
8e86eb91bb | |||
55c17aa38b | |||
aadbe88048 | |||
f36de2115f | |||
702041b7e1 | |||
a05074e675 | |||
93f30e65a9 | |||
893592c5e9 | |||
e81809aae7 | |||
ce7e7f12c8 | |||
addb21f110 | |||
c34de05106 | |||
15a4c05379 | |||
9deeec88e0 | |||
167ac55a2d | |||
ea68ccd034 | |||
d4f10800f2 | |||
dc293911ad | |||
9d68e6969e | |||
b4b686d253 | |||
98ec476198 | |||
c47b8a8bfe | |||
054f81a021 | |||
d8ea688481 | |||
e69be93e42 | |||
b2b413db12 | |||
30741d17fa | |||
ebad1f396f | |||
29d8268c94 | |||
63bfe1cee2 | |||
f3e4d70638 | |||
eecf20f109 | |||
816d7ed174 | |||
864ad2a23c | |||
66fb5c150c | |||
7c2f5f77b8 | |||
66b8cfd8c8 | |||
ff3664431f | |||
531748c536 | |||
7aa1275337 | |||
737aec1705 | |||
3e3c743392 | |||
5c5a4e075d | |||
ab9f2269aa | |||
321ec5f3fa | |||
1b2923f7c0 | |||
717d4fddd4 | |||
a7e0f0de89 | |||
3b560ef7d0 | |||
2cf747cb89 | |||
3c31e1cdd1 | |||
23eaaf1001 | |||
c2a402f3ae | |||
436a10bef4 | |||
8debf6fe81 | |||
c79e82c62a | |||
aca305bb77 | |||
5816008139 | |||
268a9ef416 | |||
642b0f3a1b | |||
cad90e8cbc | |||
4571e512d2 | |||
7ac2f1489d | |||
34349faeae | |||
ed0a5be4b6 | |||
f105df6599 | |||
13e9b4c2e5 | |||
5a83cecb0f | |||
cca6e47ec1 | |||
6196a53668 | |||
bb6448dc2e | |||
eef9293630 | |||
dac77dfd14 | |||
072d81843f | |||
29ec02d4d4 | |||
9d2a12821d | |||
63ca25290b | |||
59f64a5256 | |||
dc391deca0 | |||
114f878205 | |||
42709ea9a5 | |||
993b0d012c | |||
fb8fa07169 | |||
0ccf1e2e40 | |||
9680e1e41f | |||
a61ca4066e | |||
461b5118bd | |||
a3716c5678 | |||
2d34005965 | |||
62eefcda6e | |||
85a24775c5 | |||
6b0e9b9a7f | |||
b18c57ea7f | |||
11d32ad192 | |||
d26e9a96ec | |||
49c8bc4de6 | |||
da833eb095 | |||
701d44bd91 | |||
c621a250a7 | |||
8939e85f60 | |||
fa41d2489e | |||
59c5b992c2 | |||
2ea8194c18 | |||
421df64602 | |||
c0fca6f884 | |||
9015a8e8d9 | |||
f050634b1e | |||
becf1f066a | |||
701d299369 | |||
a20e4d447c | |||
af57c3c577 | |||
0c40ef6911 | |||
64b11f45d7 | |||
e68d86d6b6 |
24
.github/workflows/test-suite.yml
vendored
24
.github/workflows/test-suite.yml
vendored
@ -30,20 +30,20 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Run test with Rust stable
|
||||
- name: Setup test with Rust stable
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run test with Rust nightly
|
||||
- name: Setup test with Rust nightly
|
||||
if: github.event_name == 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -117,17 +117,17 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz
|
||||
- name: Run cargo tree with default features and check lindera is pressent
|
||||
run: |
|
||||
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
|
||||
|
||||
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
@ -146,7 +146,7 @@ jobs:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -165,7 +165,7 @@ jobs:
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -184,7 +184,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
359
Cargo.lock
generated
359
Cargo.lock
generated
@ -152,7 +152,7 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
"tokio-rustls 0.23.4",
|
||||
"tokio-util",
|
||||
"webpki-roots",
|
||||
"webpki-roots 0.22.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -480,7 +480,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@ -705,24 +705,27 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "charabia"
|
||||
version = "0.7.2"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "413155d93157bff9130895c3bd83970ac7f35659ca57226a96aa35cf1e8e102c"
|
||||
checksum = "bb49850f555eb71aa6fc6d4d79420e81f4d89fa56e0e9c0f6d19aace2f56c554"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"cow-utils",
|
||||
"csv",
|
||||
"deunicode",
|
||||
"either",
|
||||
"finl_unicode",
|
||||
"fst",
|
||||
"irg-kvariants",
|
||||
"jieba-rs",
|
||||
"lindera",
|
||||
"lindera-core",
|
||||
"lindera-dictionary",
|
||||
"lindera-tokenizer",
|
||||
"once_cell",
|
||||
"pinyin",
|
||||
"serde",
|
||||
"slice-group-by",
|
||||
"unicode-normalization",
|
||||
"unicode-segmentation",
|
||||
"wana_kana",
|
||||
"whatlang",
|
||||
]
|
||||
@ -1221,9 +1224,15 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "doc-comment"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -1431,7 +1440,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
@ -1453,7 +1462,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@ -1478,7 +1487,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -1596,7 +1605,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap 4.3.0",
|
||||
@ -1725,6 +1734,15 @@ dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
|
||||
dependencies = [
|
||||
"ahash 0.7.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
@ -1826,6 +1844,22 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hnsw"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b9740ebf8769ec4ad6762cc951ba18f39bba6dfbc2fbbe46285f7539af79752"
|
||||
dependencies = [
|
||||
"ahash 0.7.6",
|
||||
"hashbrown 0.11.2",
|
||||
"libm",
|
||||
"num-traits",
|
||||
"rand_core",
|
||||
"serde",
|
||||
"smallvec",
|
||||
"space",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.9"
|
||||
@ -1921,7 +1955,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -1939,6 +1973,7 @@ dependencies = [
|
||||
"meilisearch-types",
|
||||
"nelson",
|
||||
"page_size 0.5.0",
|
||||
"puffin",
|
||||
"roaring",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@ -1956,7 +1991,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown",
|
||||
"hashbrown 0.12.3",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@ -2057,7 +2092,7 @@ checksum = "37228e06c75842d1097432d94d02f37fe3ebfca9791c2e8fef6e9db17ed128c1"
|
||||
dependencies = [
|
||||
"cedarwood",
|
||||
"fxhash",
|
||||
"hashbrown",
|
||||
"hashbrown 0.12.3",
|
||||
"lazy_static",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
@ -2084,7 +2119,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -2104,15 +2139,6 @@ dependencies = [
|
||||
"simple_asn1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kanaria"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0f9d9652540055ac4fded998a73aca97d965899077ab1212587437da44196ff"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "language-tags"
|
||||
version = "0.3.2"
|
||||
@ -2180,38 +2206,11 @@ dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera"
|
||||
version = "0.23.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72be283281bec2768687b1784be03a678609b51f2f90f6f9d9b4f07953e6dd25"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"encoding",
|
||||
"kanaria",
|
||||
"lindera-cc-cedict-builder",
|
||||
"lindera-core",
|
||||
"lindera-dictionary",
|
||||
"lindera-filter",
|
||||
"lindera-ipadic-builder",
|
||||
"lindera-ko-dic-builder",
|
||||
"lindera-unidic-builder",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"unicode-blocks",
|
||||
"unicode-normalization",
|
||||
"yada",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-cc-cedict-builder"
|
||||
version = "0.23.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "10fbafd37adab44ccc2668a40fba2dbc4e665cb3c36018c15dfe2e2b830e28ce"
|
||||
checksum = "4c6bf79b29a90bcd22036e494d6cc9ac3abe9ab604b21f3258ba6dc1ce501801"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@ -2228,9 +2227,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-compress"
|
||||
version = "0.23.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed9196bf5995503f6878a090dfee6114ba86430c72f67ef3624246b564869937"
|
||||
checksum = "8f2e99e67736352bbb6ed1c273643975822505067ca32194b0981040bc50527a"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@ -2239,9 +2238,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-core"
|
||||
version = "0.23.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5f0baa9932f682e9c5b388897330f155d3c40de80016e60125897fde5e0e246"
|
||||
checksum = "7c3935e966409156f22cb4b334b21b0dce84b7aa1cad62214b466489d249c8e5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@ -2256,9 +2255,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-decompress"
|
||||
version = "0.23.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6e63fa6ef0bc3ce2c26d372aa6185b7a316194494a84f81678f5da2893bf4a2"
|
||||
checksum = "7476406abb63c49d7f59c88b9b868ee8d2981495ea7e2c3ad129902f9916b3c6"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@ -2267,63 +2266,50 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-dictionary"
|
||||
version = "0.23.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd765c36166016de87a1f447ea971573e4c63e334836c46ad0020f0408c88bfc"
|
||||
checksum = "808b7d2b3cabc25a4022526d484a4cfd1d5924dc76a26e0379707698841acef2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"lindera-cc-cedict-builder",
|
||||
"lindera-core",
|
||||
"lindera-ipadic",
|
||||
"lindera-ko-dic",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-filter"
|
||||
version = "0.23.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a5345e37fb9521ab3cee19283bed135d46b3521dc1fd13a49fa0992379056203"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"kanaria",
|
||||
"lindera-core",
|
||||
"lindera-dictionary",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"unicode-blocks",
|
||||
"unicode-normalization",
|
||||
"unicode-segmentation",
|
||||
"yada",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic"
|
||||
version = "0.23.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60eeb356295f784e7db4cfd2c6772f2bd059e565a7744e246642a07bc333a88a"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"encoding",
|
||||
"flate2",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"lindera-ipadic-builder",
|
||||
"once_cell",
|
||||
"tar",
|
||||
"lindera-ipadic-neologd-builder",
|
||||
"lindera-ko-dic",
|
||||
"lindera-ko-dic-builder",
|
||||
"lindera-unidic",
|
||||
"lindera-unidic-builder",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-builder"
|
||||
version = "0.23.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a16a2a88db9d956f5086bc976deb9951ca2dbbfef41a002df0a7bfb2c845aab"
|
||||
checksum = "31f373a280958c930e5ee4a1e4db3a0ee0542afaf02d3b5cacb8cab4e298648e"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"csv",
|
||||
"encoding_rs",
|
||||
"encoding_rs_io",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"log",
|
||||
"serde",
|
||||
"yada",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-neologd-builder"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92eff98e9ed1a7a412b91709c2343457a04ef02fa0c27c27e3a5892f5591eae9"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@ -2333,7 +2319,6 @@ dependencies = [
|
||||
"encoding_rs_io",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"log",
|
||||
@ -2343,9 +2328,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic"
|
||||
version = "0.23.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "abb479b170a841b8cfbe602d772e30849ffe0562b219190a378368968b8c8f66"
|
||||
checksum = "74c6d5bf7d8092bd6d10de7a5d74b70ea7cf234586235b0d6cdb903b05a6c9e2"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@ -2360,9 +2345,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic-builder"
|
||||
version = "0.23.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b9b58213552560717c48e7833444a20d2d7fe26a6e565f7ce0cbbf85784c7cf"
|
||||
checksum = "f0a4add6d3c1e41ec9e2690d33e287d0223fb59a30ccee4980c23f31368cae1e"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@ -2379,10 +2364,42 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic-builder"
|
||||
version = "0.23.0"
|
||||
name = "lindera-tokenizer"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6858147cdaf4a7b564c08a247449d3aca38e9b4812499651af08afbf85324596"
|
||||
checksum = "cb6a8acbd068019d1cdac7316f0dcb87f8e33ede2b13aa237f45114f9750afb8"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"lindera-core",
|
||||
"lindera-dictionary",
|
||||
"once_cell",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14abf0613d350b30d3b0406a33b1de8fa8d829f26516909421702174785991c8"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"encoding",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"lindera-unidic-builder",
|
||||
"once_cell",
|
||||
"ureq",
|
||||
"zip",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic-builder"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e204ed53d9bd63227d1e6a6c1f122ca039e00a8634ac32e7fb0281eeec8615c4"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@ -2391,6 +2408,7 @@ dependencies = [
|
||||
"encoding",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"lindera-compress",
|
||||
"lindera-core",
|
||||
"lindera-decompress",
|
||||
"log",
|
||||
@ -2481,6 +2499,12 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4_flex"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83"
|
||||
|
||||
[[package]]
|
||||
name = "manifest-dir-macros"
|
||||
version = "0.1.17"
|
||||
@ -2507,7 +2531,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@ -2516,7 +2540,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@ -2564,11 +2588,14 @@ dependencies = [
|
||||
"num_cpus",
|
||||
"obkv",
|
||||
"once_cell",
|
||||
"ordered-float",
|
||||
"parking_lot",
|
||||
"permissive-json-pointer",
|
||||
"pin-project-lite",
|
||||
"platform-dirs",
|
||||
"prometheus",
|
||||
"puffin",
|
||||
"puffin_http",
|
||||
"rand",
|
||||
"rayon",
|
||||
"regex",
|
||||
@ -2604,7 +2631,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"base64 0.21.2",
|
||||
"enum-iterator",
|
||||
@ -2623,7 +2650,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@ -2677,12 +2704,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"bimap",
|
||||
"bincode",
|
||||
"bstr",
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"charabia",
|
||||
"concat-arrays",
|
||||
@ -2697,6 +2725,8 @@ dependencies = [
|
||||
"geoutils",
|
||||
"grenad",
|
||||
"heed",
|
||||
"hnsw",
|
||||
"indexmap",
|
||||
"insta",
|
||||
"itertools",
|
||||
"json-depth-checker",
|
||||
@ -2710,7 +2740,9 @@ dependencies = [
|
||||
"obkv",
|
||||
"once_cell",
|
||||
"ordered-float",
|
||||
"puffin",
|
||||
"rand",
|
||||
"rand_pcg",
|
||||
"rayon",
|
||||
"roaring",
|
||||
"rstar",
|
||||
@ -2720,6 +2752,7 @@ dependencies = [
|
||||
"smallstr",
|
||||
"smallvec",
|
||||
"smartstring",
|
||||
"space",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"time",
|
||||
@ -3009,7 +3042,7 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@ -3190,9 +3223,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.59"
|
||||
version = "1.0.64"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b"
|
||||
checksum = "78803b62cbf1f46fde80d7c0e803111524b9877184cfe7c3033659490ac7a7da"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
@ -3233,6 +3266,35 @@ version = "2.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "76425abd4e1a0ad4bd6995dd974b52f414fca9974171df8e3708b3e660d05a21"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"cfg-if",
|
||||
"instant",
|
||||
"lz4_flex",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "puffin_http"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13bffc600c35913d282ae1e96a6ffcdf36dc7a7cdb9310e0ba15914d258c8193"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"crossbeam-channel",
|
||||
"log",
|
||||
"puffin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.28"
|
||||
@ -3272,6 +3334,16 @@ dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_pcg"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.7.0"
|
||||
@ -3381,7 +3453,7 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
"web-sys",
|
||||
"webpki-roots",
|
||||
"webpki-roots 0.22.6",
|
||||
"winreg",
|
||||
]
|
||||
|
||||
@ -3731,6 +3803,9 @@ name = "smallvec"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smartstring"
|
||||
@ -3753,6 +3828,16 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "space"
|
||||
version = "0.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c5ab9701ae895386d13db622abf411989deff7109b13b46b6173bb4ce5c1d123"
|
||||
dependencies = [
|
||||
"doc-comment",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.5.2"
|
||||
@ -4151,12 +4236,6 @@ version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-blocks"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "943e3f1f50cc455d072e0801ccb71ff893b0c88060b1169f92e35fb5bb881cc6"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.9"
|
||||
@ -4190,6 +4269,21 @@ version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
|
||||
|
||||
[[package]]
|
||||
name = "ureq"
|
||||
version = "2.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9"
|
||||
dependencies = [
|
||||
"base64 0.21.2",
|
||||
"log",
|
||||
"once_cell",
|
||||
"rustls 0.21.1",
|
||||
"rustls-webpki",
|
||||
"url",
|
||||
"webpki-roots 0.23.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.3.1"
|
||||
@ -4398,13 +4492,22 @@ dependencies = [
|
||||
"webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.23.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
|
||||
dependencies = [
|
||||
"rustls-webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "whatlang"
|
||||
version = "0.16.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c531a2dc4c462b833788be2c07eef4e621d0e9edbd55bf280cc164c1c1aa043"
|
||||
dependencies = [
|
||||
"hashbrown",
|
||||
"hashbrown 0.12.3",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
|
@ -18,7 +18,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
19
PROFILING.md
Normal file
19
PROFILING.md
Normal file
@ -0,0 +1,19 @@
|
||||
# Profiling Meilisearch
|
||||
|
||||
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
|
||||
|
||||

|
||||
|
||||
## Profiling the Indexing Process
|
||||
|
||||
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||
|
||||
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
|
||||
|
||||
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
|
||||
|
||||
## Profiling the Search Process
|
||||
|
||||
We still need to take the time to profile the search side of the engine with Puffin. It would require time to profile the filtering phase, query parsing, creation, and execution. We could even profile the Actix HTTP server.
|
||||
|
||||
The only issue we see is the framing system. Puffin requires a global frame-based profiling phase, which collides with Meilisearch's ability to accept and answer multiple requests on different threads simultaneously.
|
69
README.md
69
README.md
@ -1,15 +1,20 @@
|
||||
<p align="center">
|
||||
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
|
||||
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
|
||||
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
|
||||
</a>
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
|
||||
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<h4 align="center">
|
||||
<a href="https://www.meilisearch.com">Website</a> |
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Website</a> |
|
||||
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
|
||||
<a href="https://blog.meilisearch.com">Blog</a> |
|
||||
<a href="https://www.meilisearch.com/docs">Documentation</a> |
|
||||
<a href="https://www.meilisearch.com/docs/faq">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com">Discord</a>
|
||||
<a href="https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Meilisearch Cloud</a> |
|
||||
<a href="https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Blog</a> |
|
||||
<a href="https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Documentation</a> |
|
||||
<a href="https://www.meilisearch.com/docs/faq?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Discord</a>
|
||||
</h4>
|
||||
|
||||
<p align="center">
|
||||
@ -23,72 +28,72 @@
|
||||
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
|
||||
|
||||
<p align="center" name="demo">
|
||||
<a href="https://where2watch.meilisearch.com/#gh-light-mode-only" target="_blank">
|
||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
|
||||
<img src="assets/demo-light.gif#gh-light-mode-only" alt="A bright colored application for finding movies screening near the user">
|
||||
</a>
|
||||
<a href="https://where2watch.meilisearch.com/#gh-dark-mode-only" target="_blank">
|
||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-dark-mode-only" target="_blank">
|
||||
<img src="assets/demo-dark.gif#gh-dark-mode-only" alt="A dark colored application for finding movies screening near the user">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥
|
||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- **Search-as-you-type:** find search results in less than 50 milliseconds
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/advanced/filtering) and [faceted search](https://www.meilisearch.com/docs/learn/advanced/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/advanced/sorting):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/advanced/geosearch):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/).
|
||||
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
|
||||
|
||||
## 🚀 Getting started
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start) guide.
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
|
||||
|
||||
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
|
||||
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
|
||||
|
||||
## ☁️ Meilisearch cloud
|
||||
## ⚡ Supercharge your Meilisearch experience
|
||||
|
||||
Let us manage your infrastructure so you can focus on integrating a great search experience. Try [Meilisearch Cloud](https://meilisearch.com/pricing) today.
|
||||
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=engine&utm_medium=meilisearch). Get started with a 14-day free trial! No credit card required.
|
||||
|
||||
## 🧰 SDKs & integration tools
|
||||
|
||||
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
|
||||
|
||||
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks).
|
||||
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-link).
|
||||
|
||||
[](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks)
|
||||
[](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-logos)
|
||||
|
||||
## ⚙️ Advanced usage
|
||||
|
||||
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview) close at hand.
|
||||
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) close at hand.
|
||||
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/advanced/filtering), [sorting](https://www.meilisearch.com/docs/learn/advanced/sorting), [geosearch](https://www.meilisearch.com/docs/learn/advanced/geosearch), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens).
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
|
||||
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes).
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
|
||||
|
||||
## 📊 Telemetry
|
||||
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
|
||||
|
||||
## 📫 Get in touch!
|
||||
|
||||
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/)
|
||||
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
|
||||
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
|
||||
|
@ -98,7 +98,7 @@
|
||||
"showThresholdMarkers": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "9.5.2",
|
||||
"pluginVersion": "10.0.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -158,7 +158,7 @@
|
||||
"showThresholdMarkers": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "9.5.2",
|
||||
"pluginVersion": "10.0.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -176,8 +176,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "c4085c47-f6d3-45dd-b761-6809055bb749"
|
||||
"type": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@ -221,7 +220,7 @@
|
||||
"showThresholdMarkers": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "9.5.2",
|
||||
"pluginVersion": "10.0.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -241,8 +240,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "c4085c47-f6d3-45dd-b761-6809055bb749"
|
||||
"type": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@ -282,7 +280,7 @@
|
||||
"showThresholdMarkers": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "9.5.2",
|
||||
"pluginVersion": "10.0.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -302,8 +300,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "c4085c47-f6d3-45dd-b761-6809055bb749"
|
||||
"type": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@ -343,7 +340,7 @@
|
||||
"showThresholdMarkers": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "9.5.2",
|
||||
"pluginVersion": "10.0.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -363,8 +360,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "c4085c47-f6d3-45dd-b761-6809055bb749"
|
||||
"type": "prometheus"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
@ -411,8 +407,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@ -460,8 +455,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "c4085c47-f6d3-45dd-b761-6809055bb749"
|
||||
"type": "prometheus"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"expr": "meilisearch_used_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}",
|
||||
@ -559,7 +553,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "rate(http_response_time_seconds_sum{instance=\"$instance\", job=\"meilisearch\"}[5m]) / rate(http_response_time_seconds_count[5m])",
|
||||
"expr": "rate(meilisearch_http_response_time_seconds_sum{instance=\"$instance\", job=\"meilisearch\"}[5m]) / rate(meilisearch_http_response_time_seconds_count[5m])",
|
||||
"interval": "",
|
||||
"legendFormat": "{{method}} {{path}}",
|
||||
"range": true,
|
||||
@ -571,8 +565,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "c4085c47-f6d3-45dd-b761-6809055bb749"
|
||||
"type": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@ -615,8 +608,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@ -743,7 +735,7 @@
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.5.2",
|
||||
"pluginVersion": "10.0.1",
|
||||
"reverseYBuckets": false,
|
||||
"targets": [
|
||||
{
|
||||
@ -752,7 +744,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "sum by(le) (increase(http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"meilisearch\"}[30s]))",
|
||||
"expr": "sum by(le) (increase(meilisearch_http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"meilisearch\"}[30s]))",
|
||||
"format": "heatmap",
|
||||
"interval": "",
|
||||
"legendFormat": "{{le}}",
|
||||
@ -1306,8 +1298,7 @@
|
||||
"value": "localhost:7700"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "bb3298a4-9acf-4da1-b86a-813f29f50888"
|
||||
"type": "prometheus"
|
||||
},
|
||||
"definition": "label_values(instance)",
|
||||
"hide": 0,
|
||||
@ -1329,12 +1320,11 @@
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "mieli",
|
||||
"value": "mieli"
|
||||
"text": "index-word-count-10-count",
|
||||
"value": "index-word-count-10-count"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "bb3298a4-9acf-4da1-b86a-813f29f50888"
|
||||
"type": "prometheus"
|
||||
},
|
||||
"definition": "label_values(index)",
|
||||
"hide": 0,
|
||||
@ -1371,6 +1361,6 @@
|
||||
"timezone": "",
|
||||
"title": "Meilisearch",
|
||||
"uid": "7wcZ94dnz",
|
||||
"version": 6,
|
||||
"version": 5,
|
||||
"weekStart": ""
|
||||
}
|
||||
}
|
||||
|
BIN
assets/profiling-example.png
Normal file
BIN
assets/profiling-example.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 MiB |
@ -208,12 +208,13 @@ pub(crate) mod test {
|
||||
use std::str::FromStr;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::btreeset;
|
||||
use maplit::{btreemap, btreeset};
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
|
||||
use meilisearch_types::tasks::{Details, Status};
|
||||
use serde_json::{json, Map, Value};
|
||||
use time::macros::datetime;
|
||||
@ -260,10 +261,18 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::NotSet,
|
||||
faceting: Setting::Set(FacetingSettings {
|
||||
max_values_per_facet: Setting::Set(111),
|
||||
sort_facet_values_by: Setting::Set(
|
||||
btreemap! { S("age") => FacetValuesSort::Count },
|
||||
),
|
||||
}),
|
||||
pagination: Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
};
|
||||
@ -412,6 +421,8 @@ pub(crate) mod test {
|
||||
}
|
||||
keys.flush().unwrap();
|
||||
|
||||
// ========== TODO: create features here
|
||||
|
||||
// create the dump
|
||||
let mut file = tempfile::tempfile().unwrap();
|
||||
dump.persist_to(&mut file).unwrap();
|
||||
|
@ -191,6 +191,10 @@ impl CompatV5ToV6 {
|
||||
})
|
||||
})))
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub enum CompatIndexV5ToV6 {
|
||||
@ -336,6 +340,9 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
}
|
||||
},
|
||||
stop_words: settings.stop_words.into(),
|
||||
non_separator_tokens: v6::Setting::NotSet,
|
||||
separator_tokens: v6::Setting::NotSet,
|
||||
dictionary: v6::Setting::NotSet,
|
||||
synonyms: settings.synonyms.into(),
|
||||
distinct_attribute: settings.distinct_attribute.into(),
|
||||
typo_tolerance: match settings.typo_tolerance {
|
||||
@ -358,6 +365,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
faceting: match settings.faceting {
|
||||
v5::Setting::Set(faceting) => v6::Setting::Set(v6::FacetingSettings {
|
||||
max_values_per_facet: faceting.max_values_per_facet.into(),
|
||||
sort_facet_values_by: v6::Setting::NotSet,
|
||||
}),
|
||||
v5::Setting::Reset => v6::Setting::Reset,
|
||||
v5::Setting::NotSet => v6::Setting::NotSet,
|
||||
|
@ -107,6 +107,13 @@ impl DumpReader {
|
||||
DumpReader::Compat(compat) => compat.keys(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.features()),
|
||||
DumpReader::Compat(compat) => compat.features(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<V6Reader> for DumpReader {
|
||||
@ -189,6 +196,8 @@ pub(crate) mod test {
|
||||
|
||||
use super::*;
|
||||
|
||||
// TODO: add `features` to tests
|
||||
|
||||
#[test]
|
||||
fn import_dump_v5() {
|
||||
let dump = File::open("tests/assets/v5.dump").unwrap();
|
||||
|
@ -2,6 +2,7 @@ use std::fs::{self, File};
|
||||
use std::io::{BufRead, BufReader, ErrorKind};
|
||||
use std::path::Path;
|
||||
|
||||
use log::debug;
|
||||
pub use meilisearch_types::milli;
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
@ -18,6 +19,7 @@ pub type Unchecked = meilisearch_types::settings::Unchecked;
|
||||
|
||||
pub type Task = crate::TaskDump;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
|
||||
// ===== Other types to clarify the code of the compat module
|
||||
// everything related to the tasks
|
||||
@ -47,6 +49,7 @@ pub struct V6Reader {
|
||||
metadata: Metadata,
|
||||
tasks: BufReader<File>,
|
||||
keys: BufReader<File>,
|
||||
features: Option<RuntimeTogglableFeatures>,
|
||||
}
|
||||
|
||||
impl V6Reader {
|
||||
@ -58,11 +61,29 @@ impl V6Reader {
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
let feature_file = match fs::read(dump.path().join("experimental-features.json")) {
|
||||
Ok(feature_file) => Some(feature_file),
|
||||
Err(error) => match error.kind() {
|
||||
// Allows the file to be missing, this will only result in all experimental features disabled.
|
||||
ErrorKind::NotFound => {
|
||||
debug!("`experimental-features.json` not found in dump");
|
||||
None
|
||||
}
|
||||
_ => return Err(error.into()),
|
||||
},
|
||||
};
|
||||
let features = if let Some(feature_file) = feature_file {
|
||||
Some(serde_json::from_reader(&*feature_file)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(V6Reader {
|
||||
metadata: serde_json::from_reader(&*meta_file)?,
|
||||
instance_uid,
|
||||
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
|
||||
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
|
||||
features,
|
||||
dump,
|
||||
})
|
||||
}
|
||||
@ -129,6 +150,10 @@ impl V6Reader {
|
||||
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
|
||||
self.features
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
|
@ -4,6 +4,7 @@ use std::path::PathBuf;
|
||||
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use serde_json::{Map, Value};
|
||||
@ -53,6 +54,13 @@ impl DumpWriter {
|
||||
TaskWriter::new(self.dir.path().join("tasks"))
|
||||
}
|
||||
|
||||
pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||
Ok(std::fs::write(
|
||||
self.dir.path().join("experimental-features.json"),
|
||||
serde_json::to_string(&features)?,
|
||||
)?)
|
||||
}
|
||||
|
||||
pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
|
||||
let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
|
||||
let mut tar_encoder = tar::Builder::new(gz_encoder);
|
||||
|
@ -22,6 +22,7 @@ log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
puffin = "0.16.0"
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
@ -471,6 +471,8 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
|
||||
|
||||
puffin::profile_function!();
|
||||
|
||||
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
|
||||
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
||||
|
||||
@ -575,6 +577,9 @@ impl IndexScheduler {
|
||||
self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?;
|
||||
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
||||
}
|
||||
|
||||
puffin::profile_function!(format!("{:?}", batch));
|
||||
|
||||
match batch {
|
||||
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
@ -839,6 +844,10 @@ impl IndexScheduler {
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// 4. Dump experimental feature settings
|
||||
let features = self.features()?.runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
@ -1107,6 +1116,8 @@ impl IndexScheduler {
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
) -> Result<Vec<Task>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
match operation {
|
||||
IndexOperation::DocumentClear { mut tasks, .. } => {
|
||||
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
|
||||
|
@ -123,6 +123,8 @@ pub enum Error {
|
||||
IoError(#[from] std::io::Error),
|
||||
#[error(transparent)]
|
||||
Persist(#[from] tempfile::PersistError),
|
||||
#[error(transparent)]
|
||||
FeatureNotEnabled(#[from] FeatureNotEnabledError),
|
||||
|
||||
#[error(transparent)]
|
||||
Anyhow(#[from] anyhow::Error),
|
||||
@ -142,6 +144,16 @@ pub enum Error {
|
||||
PlannedFailure,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error(
|
||||
"{disabled_action} requires enabling the `{feature}` experimental feature. See {issue_link}"
|
||||
)]
|
||||
pub struct FeatureNotEnabledError {
|
||||
pub disabled_action: &'static str,
|
||||
pub feature: &'static str,
|
||||
pub issue_link: &'static str,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub fn is_recoverable(&self) -> bool {
|
||||
match self {
|
||||
@ -170,6 +182,7 @@ impl Error {
|
||||
| Error::FileStore(_)
|
||||
| Error::IoError(_)
|
||||
| Error::Persist(_)
|
||||
| Error::FeatureNotEnabled(_)
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
@ -214,6 +227,7 @@ impl ErrorCode for Error {
|
||||
Error::FileStore(e) => e.error_code(),
|
||||
Error::IoError(e) => e.error_code(),
|
||||
Error::Persist(e) => e.error_code(),
|
||||
Error::FeatureNotEnabled(_) => Code::FeatureNotEnabled,
|
||||
|
||||
// Irrecoverable errors
|
||||
Error::Anyhow(_) => Code::Internal,
|
||||
|
98
index-scheduler/src/features.rs
Normal file
98
index-scheduler/src/features.rs
Normal file
@ -0,0 +1,98 @@
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
|
||||
const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct FeatureData {
|
||||
runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
instance: InstanceTogglableFeatures,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct RoFeatures {
|
||||
runtime: RuntimeTogglableFeatures,
|
||||
instance: InstanceTogglableFeatures,
|
||||
}
|
||||
|
||||
impl RoFeatures {
|
||||
fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
|
||||
let runtime = data.runtime_features(txn)?;
|
||||
Ok(Self { runtime, instance: data.instance })
|
||||
}
|
||||
|
||||
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
||||
self.runtime
|
||||
}
|
||||
|
||||
pub fn check_score_details(&self) -> Result<()> {
|
||||
if self.runtime.score_details {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Computing score details",
|
||||
feature: "score details",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/674",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_metrics(&self) -> Result<()> {
|
||||
if self.instance.metrics {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Getting metrics",
|
||||
feature: "metrics",
|
||||
issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_vector(&self) -> Result<()> {
|
||||
if self.runtime.vector_store {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Passing `vector` as a query parameter",
|
||||
feature: "vector store",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/677",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Self { runtime: runtime_features, instance: instance_features })
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(
|
||||
&self,
|
||||
mut wtxn: RwTxn,
|
||||
features: RuntimeTogglableFeatures,
|
||||
) -> Result<()> {
|
||||
self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
|
||||
Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
|
||||
RoFeatures::new(txn, self)
|
||||
}
|
||||
}
|
@ -223,7 +223,9 @@ impl IndexMap {
|
||||
enable_mdb_writemap: bool,
|
||||
map_size_growth: usize,
|
||||
) {
|
||||
let Some(index) = self.available.remove(uuid) else { return; };
|
||||
let Some(index) = self.available.remove(uuid) else {
|
||||
return;
|
||||
};
|
||||
self.close(*uuid, index, enable_mdb_writemap, map_size_growth);
|
||||
}
|
||||
|
||||
|
@ -28,6 +28,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
started_at,
|
||||
finished_at,
|
||||
index_mapper,
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
|
@ -21,6 +21,7 @@ content of the scheduler or enqueue new tasks.
|
||||
mod autobatcher;
|
||||
mod batch;
|
||||
pub mod error;
|
||||
mod features;
|
||||
mod index_mapper;
|
||||
#[cfg(test)]
|
||||
mod insta_snapshot;
|
||||
@ -41,8 +42,10 @@ use std::time::Duration;
|
||||
|
||||
use dump::{KindDump, TaskDump, UpdateFile};
|
||||
pub use error::Error;
|
||||
pub use features::RoFeatures;
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
@ -247,6 +250,8 @@ pub struct IndexSchedulerOptions {
|
||||
/// The maximum number of tasks stored in the task queue before starting
|
||||
/// to auto schedule task deletions.
|
||||
pub max_number_of_tasks: usize,
|
||||
/// The experimental features enabled for this instance.
|
||||
pub instance_features: InstanceTogglableFeatures,
|
||||
}
|
||||
|
||||
/// Structure which holds meilisearch's indexes and schedules the tasks
|
||||
@ -290,6 +295,9 @@ pub struct IndexScheduler {
|
||||
/// In charge of creating, opening, storing and returning indexes.
|
||||
pub(crate) index_mapper: IndexMapper,
|
||||
|
||||
/// In charge of fetching and setting the status of experimental features.
|
||||
features: features::FeatureData,
|
||||
|
||||
/// Get a signal when a batch needs to be processed.
|
||||
pub(crate) wake_up: Arc<SignalEvent>,
|
||||
|
||||
@ -360,6 +368,7 @@ impl IndexScheduler {
|
||||
planned_failures: self.planned_failures.clone(),
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: self.run_loop_iteration.clone(),
|
||||
features: self.features.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -398,9 +407,12 @@ impl IndexScheduler {
|
||||
};
|
||||
|
||||
let env = heed::EnvOpenOptions::new()
|
||||
.max_dbs(10)
|
||||
.max_dbs(11)
|
||||
.map_size(budget.task_db_size)
|
||||
.open(options.tasks_path)?;
|
||||
|
||||
let features = features::FeatureData::new(&env, options.instance_features)?;
|
||||
|
||||
let file_store = FileStore::new(&options.update_file_path)?;
|
||||
|
||||
let mut wtxn = env.write_txn()?;
|
||||
@ -452,6 +464,7 @@ impl IndexScheduler {
|
||||
planned_failures,
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: Arc::new(RwLock::new(0)),
|
||||
features,
|
||||
};
|
||||
|
||||
this.run();
|
||||
@ -1019,6 +1032,8 @@ impl IndexScheduler {
|
||||
self.breakpoint(Breakpoint::Start);
|
||||
}
|
||||
|
||||
puffin::GlobalProfiler::lock().new_frame();
|
||||
|
||||
self.cleanup_task_queue()?;
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
@ -1214,6 +1229,17 @@ impl IndexScheduler {
|
||||
Ok(IndexStats { is_indexing, inner_stats: index_stats })
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Result<RoFeatures> {
|
||||
let rtxn = self.read_txn()?;
|
||||
self.features.features(rtxn)
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||
let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
self.features.put_runtime_features(wtxn, features)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
|
||||
match task.content_uuid() {
|
||||
Some(content_file) => self.delete_update_file(content_file),
|
||||
@ -1534,6 +1560,7 @@ mod tests {
|
||||
indexer_config,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
instance_features: Default::default(),
|
||||
};
|
||||
configuration(&mut options);
|
||||
|
||||
|
@ -151,6 +151,10 @@ make_missing_field_convenience_builder!(MissingApiKeyExpiresAt, missing_api_key_
|
||||
make_missing_field_convenience_builder!(MissingApiKeyIndexes, missing_api_key_indexes);
|
||||
make_missing_field_convenience_builder!(MissingSwapIndexes, missing_swap_indexes);
|
||||
make_missing_field_convenience_builder!(MissingDocumentFilter, missing_document_filter);
|
||||
make_missing_field_convenience_builder!(
|
||||
MissingFacetSearchFacetName,
|
||||
missing_facet_search_facet_name
|
||||
);
|
||||
|
||||
// Integrate a sub-error into a [`DeserrError`] by taking its error message but using
|
||||
// the default error code (C) from `Self`
|
||||
|
@ -217,6 +217,8 @@ InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
|
||||
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
|
||||
@ -224,12 +226,14 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ;
|
||||
@ -239,7 +243,12 @@ InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
@ -250,6 +259,9 @@ InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsStopWords , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsNonSeparatorTokens , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSeparatorTokens , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDictionary , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSynonyms , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsTypoTolerance , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidState , Internal , INTERNAL_SERVER_ERROR ;
|
||||
@ -269,6 +281,7 @@ InvalidTaskStatuses , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidTaskTypes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidTaskUids , InvalidRequest , BAD_REQUEST ;
|
||||
IoError , System , UNPROCESSABLE_ENTITY;
|
||||
FeatureNotEnabled , InvalidRequest , BAD_REQUEST ;
|
||||
MalformedPayload , InvalidRequest , BAD_REQUEST ;
|
||||
MaxFieldsLimitExceeded , InvalidRequest , BAD_REQUEST ;
|
||||
MissingApiKeyActions , InvalidRequest , BAD_REQUEST ;
|
||||
@ -277,6 +290,7 @@ MissingApiKeyIndexes , InvalidRequest , BAD_REQUEST ;
|
||||
MissingAuthorizationHeader , Auth , UNAUTHORIZED ;
|
||||
MissingContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||
MissingDocumentId , InvalidRequest , BAD_REQUEST ;
|
||||
MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||
MissingIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||
MissingMasterKey , Auth , UNAUTHORIZED ;
|
||||
MissingPayload , InvalidRequest , BAD_REQUEST ;
|
||||
@ -330,8 +344,16 @@ impl ErrorCode for milli::Error {
|
||||
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
|
||||
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
|
||||
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
|
||||
UserError::InvalidSearchableAttribute { .. } => {
|
||||
Code::InvalidSearchAttributesToSearchOn
|
||||
}
|
||||
UserError::InvalidFacetSearchFacetName { .. } => {
|
||||
Code::InvalidFacetSearchFacetName
|
||||
}
|
||||
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
||||
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
|
||||
UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType,
|
||||
UserError::SortError(_) => Code::InvalidSearchSort,
|
||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
||||
Code::InvalidSettingsTypoTolerance
|
||||
|
33
meilisearch-types/src/facet_values_sort.rs
Normal file
33
meilisearch-types/src/facet_values_sort.rs
Normal file
@ -0,0 +1,33 @@
|
||||
use deserr::Deserr;
|
||||
use milli::OrderBy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Deserr)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[deserr(rename_all = camelCase)]
|
||||
pub enum FacetValuesSort {
|
||||
/// Facet values are sorted in alphabetical order, ascending from A to Z.
|
||||
#[default]
|
||||
Alpha,
|
||||
/// Facet values are sorted by decreasing count.
|
||||
/// The count is the number of records containing this facet value in the results of the query.
|
||||
Count,
|
||||
}
|
||||
|
||||
impl From<FacetValuesSort> for OrderBy {
|
||||
fn from(val: FacetValuesSort) -> Self {
|
||||
match val {
|
||||
FacetValuesSort::Alpha => OrderBy::Lexicographic,
|
||||
FacetValuesSort::Count => OrderBy::Count,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<OrderBy> for FacetValuesSort {
|
||||
fn from(val: OrderBy) -> Self {
|
||||
match val {
|
||||
OrderBy::Lexicographic => FacetValuesSort::Alpha,
|
||||
OrderBy::Count => FacetValuesSort::Count,
|
||||
}
|
||||
}
|
||||
}
|
13
meilisearch-types/src/features.rs
Normal file
13
meilisearch-types/src/features.rs
Normal file
@ -0,0 +1,13 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default)]
|
||||
#[serde(rename_all = "camelCase", default)]
|
||||
pub struct RuntimeTogglableFeatures {
|
||||
pub score_details: bool,
|
||||
pub vector_store: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
pub struct InstanceTogglableFeatures {
|
||||
pub metrics: bool,
|
||||
}
|
@ -147,9 +147,7 @@ impl Key {
|
||||
fn parse_expiration_date(
|
||||
string: Option<String>,
|
||||
) -> std::result::Result<Option<OffsetDateTime>, ParseOffsetDateTimeError> {
|
||||
let Some(string) = string else {
|
||||
return Ok(None)
|
||||
};
|
||||
let Some(string) = string else { return Ok(None) };
|
||||
let datetime = if let Ok(datetime) = OffsetDateTime::parse(&string, &Rfc3339) {
|
||||
datetime
|
||||
} else if let Ok(primitive_datetime) = PrimitiveDateTime::parse(
|
||||
@ -274,6 +272,12 @@ pub enum Action {
|
||||
#[serde(rename = "keys.delete")]
|
||||
#[deserr(rename = "keys.delete")]
|
||||
KeysDelete,
|
||||
#[serde(rename = "experimental.get")]
|
||||
#[deserr(rename = "experimental.get")]
|
||||
ExperimentalFeaturesGet,
|
||||
#[serde(rename = "experimental.update")]
|
||||
#[deserr(rename = "experimental.update")]
|
||||
ExperimentalFeaturesUpdate,
|
||||
}
|
||||
|
||||
impl Action {
|
||||
@ -310,6 +314,8 @@ impl Action {
|
||||
KEYS_GET => Some(Self::KeysGet),
|
||||
KEYS_UPDATE => Some(Self::KeysUpdate),
|
||||
KEYS_DELETE => Some(Self::KeysDelete),
|
||||
EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet),
|
||||
EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate),
|
||||
_otherwise => None,
|
||||
}
|
||||
}
|
||||
@ -352,4 +358,6 @@ pub mod actions {
|
||||
pub const KEYS_GET: u8 = KeysGet.repr();
|
||||
pub const KEYS_UPDATE: u8 = KeysUpdate.repr();
|
||||
pub const KEYS_DELETE: u8 = KeysDelete.repr();
|
||||
pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr();
|
||||
pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr();
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ pub mod compression;
|
||||
pub mod deserr;
|
||||
pub mod document_formats;
|
||||
pub mod error;
|
||||
pub mod facet_values_sort;
|
||||
pub mod features;
|
||||
pub mod index_uid;
|
||||
pub mod index_uid_pattern;
|
||||
pub mod keys;
|
||||
|
@ -14,8 +14,9 @@ use serde::{Deserialize, Serialize, Serializer};
|
||||
|
||||
use crate::deserr::DeserrJsonError;
|
||||
use crate::error::deserr_codes::*;
|
||||
use crate::facet_values_sort::FacetValuesSort;
|
||||
|
||||
/// The maximimum number of results that the engine
|
||||
/// The maximum number of results that the engine
|
||||
/// will be able to return in one search call.
|
||||
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
|
||||
|
||||
@ -102,6 +103,9 @@ pub struct FacetingSettings {
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
pub max_values_per_facet: Setting<usize>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
pub sort_facet_values_by: Setting<BTreeMap<String, FacetValuesSort>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
|
||||
@ -167,6 +171,15 @@ pub struct Settings<T> {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsStopWords>)]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsNonSeparatorTokens>)]
|
||||
pub non_separator_tokens: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsSeparatorTokens>)]
|
||||
pub separator_tokens: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsDictionary>)]
|
||||
pub dictionary: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsSynonyms>)]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
@ -197,6 +210,9 @@ impl Settings<Checked> {
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
non_separator_tokens: Setting::Reset,
|
||||
separator_tokens: Setting::Reset,
|
||||
dictionary: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
typo_tolerance: Setting::Reset,
|
||||
faceting: Setting::Reset,
|
||||
@ -213,6 +229,9 @@ impl Settings<Checked> {
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
@ -228,6 +247,9 @@ impl Settings<Checked> {
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
@ -270,6 +292,9 @@ impl Settings<Unchecked> {
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
non_separator_tokens: self.non_separator_tokens,
|
||||
separator_tokens: self.separator_tokens,
|
||||
dictionary: self.dictionary,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
typo_tolerance: self.typo_tolerance,
|
||||
faceting: self.faceting,
|
||||
@ -331,6 +356,28 @@ pub fn apply_settings_to_builder(
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.non_separator_tokens {
|
||||
Setting::Set(ref non_separator_tokens) => {
|
||||
builder.set_non_separator_tokens(non_separator_tokens.clone())
|
||||
}
|
||||
Setting::Reset => builder.reset_non_separator_tokens(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.separator_tokens {
|
||||
Setting::Set(ref separator_tokens) => {
|
||||
builder.set_separator_tokens(separator_tokens.clone())
|
||||
}
|
||||
Setting::Reset => builder.reset_separator_tokens(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.dictionary {
|
||||
Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
|
||||
Setting::Reset => builder.reset_dictionary(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.synonyms {
|
||||
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
|
||||
Setting::Reset => builder.reset_synonyms(),
|
||||
@ -398,13 +445,25 @@ pub fn apply_settings_to_builder(
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.faceting {
|
||||
Setting::Set(ref value) => match value.max_values_per_facet {
|
||||
Setting::Set(val) => builder.set_max_values_per_facet(val),
|
||||
Setting::Reset => builder.reset_max_values_per_facet(),
|
||||
Setting::NotSet => (),
|
||||
},
|
||||
Setting::Reset => builder.reset_max_values_per_facet(),
|
||||
match &settings.faceting {
|
||||
Setting::Set(FacetingSettings { max_values_per_facet, sort_facet_values_by }) => {
|
||||
match max_values_per_facet {
|
||||
Setting::Set(val) => builder.set_max_values_per_facet(*val),
|
||||
Setting::Reset => builder.reset_max_values_per_facet(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
match sort_facet_values_by {
|
||||
Setting::Set(val) => builder.set_sort_facet_values_by(
|
||||
val.iter().map(|(name, order)| (name.clone(), (*order).into())).collect(),
|
||||
),
|
||||
Setting::Reset => builder.reset_sort_facet_values_by(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
Setting::Reset => {
|
||||
builder.reset_max_values_per_facet();
|
||||
builder.reset_sort_facet_values_by();
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
@ -443,15 +502,14 @@ pub fn settings(
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
|
||||
let non_separator_tokens = index.non_separator_tokens(rtxn)?.unwrap_or_default();
|
||||
let separator_tokens = index.separator_tokens(rtxn)?.unwrap_or_default();
|
||||
let dictionary = index.dictionary(rtxn)?.unwrap_or_default();
|
||||
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||
|
||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||
// this information we are going to put space between words.
|
||||
let synonyms = index
|
||||
.synonyms(rtxn)?
|
||||
.iter()
|
||||
.map(|(key, values)| (key.join(" "), values.iter().map(|value| value.join(" ")).collect()))
|
||||
.collect();
|
||||
let synonyms = index.user_defined_synonyms(rtxn)?;
|
||||
|
||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
|
||||
@ -476,6 +534,13 @@ pub fn settings(
|
||||
max_values_per_facet: Setting::Set(
|
||||
index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||
),
|
||||
sort_facet_values_by: Setting::Set(
|
||||
index
|
||||
.sort_facet_values_by(rtxn)?
|
||||
.into_iter()
|
||||
.map(|(name, sort)| (name, sort.into()))
|
||||
.collect(),
|
||||
),
|
||||
};
|
||||
|
||||
let pagination = PaginationSettings {
|
||||
@ -497,6 +562,9 @@ pub fn settings(
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
non_separator_tokens: Setting::Set(non_separator_tokens),
|
||||
separator_tokens: Setting::Set(separator_tokens),
|
||||
dictionary: Setting::Set(dictionary),
|
||||
distinct_attribute: match distinct_field {
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
@ -619,6 +687,9 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
@ -640,6 +711,9 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
|
@ -14,14 +14,27 @@ default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.6.4"
|
||||
actix-http = { version = "3.3.1", default-features = false, features = ["compress-brotli", "compress-gzip", "rustls"] }
|
||||
actix-web = { version = "4.3.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] }
|
||||
actix-http = { version = "3.3.1", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls",
|
||||
] }
|
||||
actix-web = { version = "4.3.1", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls",
|
||||
] }
|
||||
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
|
||||
anyhow = { version = "1.0.70", features = ["backtrace"] }
|
||||
async-stream = "0.3.5"
|
||||
async-trait = "0.1.68"
|
||||
bstr = "1.4.0"
|
||||
byte-unit = { version = "4.0.19", default-features = false, features = ["std", "serde"] }
|
||||
byte-unit = { version = "4.0.19", default-features = false, features = [
|
||||
"std",
|
||||
"serde",
|
||||
] }
|
||||
bytes = "1.4.0"
|
||||
clap = { version = "4.2.1", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.8"
|
||||
@ -48,15 +61,21 @@ mime = "0.3.17"
|
||||
num_cpus = "1.15.0"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.17.1"
|
||||
ordered-float = "3.7.0"
|
||||
parking_lot = "0.12.1"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.9"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
puffin = "0.16.0"
|
||||
puffin_http = { version = "0.13.0", optional = true }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.7.0"
|
||||
regex = "1.7.3"
|
||||
reqwest = { version = "0.11.16", features = ["rustls-tls", "json"], default-features = false }
|
||||
reqwest = { version = "0.11.16", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
rustls = "0.20.8"
|
||||
rustls-pemfile = "1.0.2"
|
||||
segment = { version = "0.2.2", optional = true }
|
||||
@ -70,7 +89,12 @@ sysinfo = "0.28.4"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
time = { version = "0.3.20", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.27.0", features = ["full"] }
|
||||
tokio-stream = "0.1.12"
|
||||
toml = "0.7.3"
|
||||
@ -89,7 +113,7 @@ brotli = "3.3.4"
|
||||
insta = "1.29.0"
|
||||
manifest-dir-macros = "0.1.16"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = {path = "../meili-snap"}
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
temp-env = "0.3.3"
|
||||
urlencoding = "2.1.2"
|
||||
yaup = "0.2.1"
|
||||
@ -98,7 +122,10 @@ yaup = "0.2.1"
|
||||
anyhow = { version = "1.0.70", optional = true }
|
||||
cargo_toml = { version = "0.15.2", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
|
||||
reqwest = { version = "0.11.16", features = [
|
||||
"blocking",
|
||||
"rustls-tls",
|
||||
], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
tempfile = { version = "3.5.0", optional = true }
|
||||
@ -108,7 +135,18 @@ zip = { version = "0.6.4", optional = true }
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
analytics = ["segment"]
|
||||
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
|
||||
profile-with-puffin = ["dep:puffin_http"]
|
||||
mini-dashboard = [
|
||||
"actix-web-static-files",
|
||||
"static-files",
|
||||
"anyhow",
|
||||
"cargo_toml",
|
||||
"hex",
|
||||
"reqwest",
|
||||
"sha-1",
|
||||
"tempfile",
|
||||
"zip",
|
||||
]
|
||||
chinese = ["meilisearch-types/chinese"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
|
@ -38,6 +38,18 @@ impl MultiSearchAggregator {
|
||||
pub fn succeed(&mut self) {}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FacetSearchAggregator;
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl FacetSearchAggregator {
|
||||
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
}
|
||||
|
||||
impl MockAnalytics {
|
||||
#[allow(clippy::new_ret_no_self)]
|
||||
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
|
||||
@ -56,6 +68,7 @@ impl Analytics for MockAnalytics {
|
||||
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
||||
fn post_search(&self, _aggregate: super::SearchAggregator) {}
|
||||
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
|
||||
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
|
||||
fn add_documents(
|
||||
&self,
|
||||
_documents_query: &UpdateDocumentsQuery,
|
||||
|
@ -25,6 +25,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
|
||||
pub type SearchAggregator = mock_analytics::SearchAggregator;
|
||||
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
|
||||
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
|
||||
|
||||
// if we are in release mode and the feature analytics was enabled
|
||||
// we use the real analytics
|
||||
@ -34,6 +36,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
||||
pub type SearchAggregator = segment_analytics::SearchAggregator;
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
|
||||
|
||||
/// The Meilisearch config dir:
|
||||
/// `~/.config/Meilisearch` on *NIX or *BSD.
|
||||
@ -88,6 +92,9 @@ pub trait Analytics: Sync + Send {
|
||||
/// This method should be called to aggregate a post array of searches
|
||||
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
|
||||
|
||||
/// This method should be called to aggregate post facet values searches
|
||||
fn post_facet_search(&self, aggregate: FacetSearchAggregator);
|
||||
|
||||
// this method should be called to aggregate a add documents request
|
||||
fn add_documents(
|
||||
&self,
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::collections::{BinaryHeap, HashMap, HashSet};
|
||||
use std::fs;
|
||||
use std::mem::take;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
@ -29,11 +30,13 @@ use super::{
|
||||
use crate::analytics::Analytics;
|
||||
use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
|
||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
||||
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
||||
use crate::routes::tasks::TasksFilterQuery;
|
||||
use crate::routes::{create_all_stats, Stats};
|
||||
use crate::search::{
|
||||
SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
};
|
||||
use crate::Opt;
|
||||
|
||||
@ -71,6 +74,7 @@ pub enum AnalyticsMsg {
|
||||
AggregateGetSearch(SearchAggregator),
|
||||
AggregatePostSearch(SearchAggregator),
|
||||
AggregatePostMultiSearch(MultiSearchAggregator),
|
||||
AggregatePostFacetSearch(FacetSearchAggregator),
|
||||
AggregateAddDocuments(DocumentsAggregator),
|
||||
AggregateDeleteDocuments(DocumentsDeletionAggregator),
|
||||
AggregateUpdateDocuments(DocumentsAggregator),
|
||||
@ -139,6 +143,7 @@ impl SegmentAnalytics {
|
||||
batcher,
|
||||
post_search_aggregator: SearchAggregator::default(),
|
||||
post_multi_search_aggregator: MultiSearchAggregator::default(),
|
||||
post_facet_search_aggregator: FacetSearchAggregator::default(),
|
||||
get_search_aggregator: SearchAggregator::default(),
|
||||
add_documents_aggregator: DocumentsAggregator::default(),
|
||||
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
|
||||
@ -182,6 +187,10 @@ impl super::Analytics for SegmentAnalytics {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
|
||||
}
|
||||
|
||||
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
|
||||
}
|
||||
|
||||
fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
|
||||
}
|
||||
@ -354,6 +363,7 @@ pub struct Segment {
|
||||
get_search_aggregator: SearchAggregator,
|
||||
post_search_aggregator: SearchAggregator,
|
||||
post_multi_search_aggregator: MultiSearchAggregator,
|
||||
post_facet_search_aggregator: FacetSearchAggregator,
|
||||
add_documents_aggregator: DocumentsAggregator,
|
||||
delete_documents_aggregator: DocumentsDeletionAggregator,
|
||||
update_documents_aggregator: DocumentsAggregator,
|
||||
@ -418,6 +428,7 @@ impl Segment {
|
||||
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
||||
@ -461,55 +472,74 @@ impl Segment {
|
||||
})
|
||||
.await;
|
||||
}
|
||||
let get_search = std::mem::take(&mut self.get_search_aggregator)
|
||||
.into_event(&self.user, "Documents Searched GET");
|
||||
let post_search = std::mem::take(&mut self.post_search_aggregator)
|
||||
.into_event(&self.user, "Documents Searched POST");
|
||||
let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
|
||||
.into_event(&self.user, "Documents Searched by Multi-Search POST");
|
||||
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Added");
|
||||
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Deleted");
|
||||
let update_documents = std::mem::take(&mut self.update_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Updated");
|
||||
let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Fetched GET");
|
||||
let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Fetched POST");
|
||||
let get_tasks =
|
||||
std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
|
||||
let health =
|
||||
std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen");
|
||||
|
||||
if let Some(get_search) = get_search {
|
||||
let Segment {
|
||||
inbox: _,
|
||||
opt: _,
|
||||
batcher: _,
|
||||
user,
|
||||
get_search_aggregator,
|
||||
post_search_aggregator,
|
||||
post_multi_search_aggregator,
|
||||
post_facet_search_aggregator,
|
||||
add_documents_aggregator,
|
||||
delete_documents_aggregator,
|
||||
update_documents_aggregator,
|
||||
get_fetch_documents_aggregator,
|
||||
post_fetch_documents_aggregator,
|
||||
get_tasks_aggregator,
|
||||
health_aggregator,
|
||||
} = self;
|
||||
|
||||
if let Some(get_search) =
|
||||
take(get_search_aggregator).into_event(&user, "Documents Searched GET")
|
||||
{
|
||||
let _ = self.batcher.push(get_search).await;
|
||||
}
|
||||
if let Some(post_search) = post_search {
|
||||
if let Some(post_search) =
|
||||
take(post_search_aggregator).into_event(&user, "Documents Searched POST")
|
||||
{
|
||||
let _ = self.batcher.push(post_search).await;
|
||||
}
|
||||
if let Some(post_multi_search) = post_multi_search {
|
||||
if let Some(post_multi_search) = take(post_multi_search_aggregator)
|
||||
.into_event(&user, "Documents Searched by Multi-Search POST")
|
||||
{
|
||||
let _ = self.batcher.push(post_multi_search).await;
|
||||
}
|
||||
if let Some(add_documents) = add_documents {
|
||||
if let Some(post_facet_search) =
|
||||
take(post_facet_search_aggregator).into_event(&user, "Facet Searched POST")
|
||||
{
|
||||
let _ = self.batcher.push(post_facet_search).await;
|
||||
}
|
||||
if let Some(add_documents) =
|
||||
take(add_documents_aggregator).into_event(&user, "Documents Added")
|
||||
{
|
||||
let _ = self.batcher.push(add_documents).await;
|
||||
}
|
||||
if let Some(delete_documents) = delete_documents {
|
||||
if let Some(delete_documents) =
|
||||
take(delete_documents_aggregator).into_event(&user, "Documents Deleted")
|
||||
{
|
||||
let _ = self.batcher.push(delete_documents).await;
|
||||
}
|
||||
if let Some(update_documents) = update_documents {
|
||||
if let Some(update_documents) =
|
||||
take(update_documents_aggregator).into_event(&user, "Documents Updated")
|
||||
{
|
||||
let _ = self.batcher.push(update_documents).await;
|
||||
}
|
||||
if let Some(get_fetch_documents) = get_fetch_documents {
|
||||
if let Some(get_fetch_documents) =
|
||||
take(get_fetch_documents_aggregator).into_event(&user, "Documents Fetched GET")
|
||||
{
|
||||
let _ = self.batcher.push(get_fetch_documents).await;
|
||||
}
|
||||
if let Some(post_fetch_documents) = post_fetch_documents {
|
||||
if let Some(post_fetch_documents) =
|
||||
take(post_fetch_documents_aggregator).into_event(&user, "Documents Fetched POST")
|
||||
{
|
||||
let _ = self.batcher.push(post_fetch_documents).await;
|
||||
}
|
||||
if let Some(get_tasks) = get_tasks {
|
||||
if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") {
|
||||
let _ = self.batcher.push(get_tasks).await;
|
||||
}
|
||||
if let Some(health) = health {
|
||||
if let Some(health) = take(health_aggregator).into_event(&user, "Health Seen") {
|
||||
let _ = self.batcher.push(health).await;
|
||||
}
|
||||
let _ = self.batcher.flush().await;
|
||||
@ -548,6 +578,10 @@ pub struct SearchAggregator {
|
||||
// The maximum number of terms in a q request
|
||||
max_terms_number: usize,
|
||||
|
||||
// vector
|
||||
// The maximum number of floats in a vector request
|
||||
max_vector_size: usize,
|
||||
|
||||
// every time a search is done, we increment the counter linked to the used settings
|
||||
matching_strategy: HashMap<String, usize>,
|
||||
|
||||
@ -569,6 +603,10 @@ pub struct SearchAggregator {
|
||||
// facets
|
||||
facets_sum_of_terms: usize,
|
||||
facets_total_number_of_facets: usize,
|
||||
|
||||
// scoring
|
||||
show_ranking_score: bool,
|
||||
show_ranking_score_details: bool,
|
||||
}
|
||||
|
||||
impl SearchAggregator {
|
||||
@ -613,6 +651,10 @@ impl SearchAggregator {
|
||||
ret.max_terms_number = q.split_whitespace().count();
|
||||
}
|
||||
|
||||
if let Some(ref vector) = query.vector {
|
||||
ret.max_vector_size = vector.len();
|
||||
}
|
||||
|
||||
if query.is_finite_pagination() {
|
||||
let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||
ret.max_limit = limit;
|
||||
@ -632,6 +674,9 @@ impl SearchAggregator {
|
||||
ret.crop_length = query.crop_length != DEFAULT_CROP_LENGTH();
|
||||
ret.show_matches_position = query.show_matches_position;
|
||||
|
||||
ret.show_ranking_score = query.show_ranking_score;
|
||||
ret.show_ranking_score_details = query.show_ranking_score_details;
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
@ -706,6 +751,10 @@ impl SearchAggregator {
|
||||
let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
|
||||
*matching_strategy = matching_strategy.saturating_add(value);
|
||||
}
|
||||
|
||||
// scoring
|
||||
self.show_ranking_score |= other.show_ranking_score;
|
||||
self.show_ranking_score_details |= other.show_ranking_score_details;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
@ -760,7 +809,11 @@ impl SearchAggregator {
|
||||
},
|
||||
"matching_strategy": {
|
||||
"most_used_strategy": self.matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||
}
|
||||
},
|
||||
"scoring": {
|
||||
"show_ranking_score": self.show_ranking_score,
|
||||
"show_ranking_score_details": self.show_ranking_score_details,
|
||||
},
|
||||
});
|
||||
|
||||
Some(Track {
|
||||
@ -886,6 +939,120 @@ impl MultiSearchAggregator {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FacetSearchAggregator {
|
||||
timestamp: Option<OffsetDateTime>,
|
||||
|
||||
// context
|
||||
user_agents: HashSet<String>,
|
||||
|
||||
// requests
|
||||
total_received: usize,
|
||||
total_succeeded: usize,
|
||||
time_spent: BinaryHeap<usize>,
|
||||
|
||||
// The set of all facetNames that were used
|
||||
facet_names: HashSet<String>,
|
||||
|
||||
// As there been any other parameter than the facetName or facetQuery ones?
|
||||
additional_search_parameters_provided: bool,
|
||||
}
|
||||
|
||||
impl FacetSearchAggregator {
|
||||
pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
|
||||
let FacetSearchQuery {
|
||||
facet_query: _,
|
||||
facet_name,
|
||||
vector,
|
||||
q,
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
ret.timestamp = Some(OffsetDateTime::now_utc());
|
||||
|
||||
ret.total_received = 1;
|
||||
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
||||
ret.facet_names = Some(facet_name.clone()).into_iter().collect();
|
||||
|
||||
ret.additional_search_parameters_provided = q.is_some()
|
||||
|| vector.is_some()
|
||||
|| filter.is_some()
|
||||
|| *matching_strategy != MatchingStrategy::default()
|
||||
|| attributes_to_search_on.is_some();
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, result: &FacetSearchResult) {
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||
self.time_spent.push(result.processing_time_ms as usize);
|
||||
}
|
||||
|
||||
/// Aggregate one [SearchAggregator] into another.
|
||||
pub fn aggregate(&mut self, mut other: Self) {
|
||||
if self.timestamp.is_none() {
|
||||
self.timestamp = other.timestamp;
|
||||
}
|
||||
|
||||
// context
|
||||
for user_agent in other.user_agents.into_iter() {
|
||||
self.user_agents.insert(user_agent);
|
||||
}
|
||||
|
||||
// request
|
||||
self.total_received = self.total_received.saturating_add(other.total_received);
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
|
||||
self.time_spent.append(&mut other.time_spent);
|
||||
|
||||
// facet_names
|
||||
for facet_name in other.facet_names.into_iter() {
|
||||
self.facet_names.insert(facet_name);
|
||||
}
|
||||
|
||||
// additional_search_parameters_provided
|
||||
self.additional_search_parameters_provided = self.additional_search_parameters_provided
|
||||
| other.additional_search_parameters_provided;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
if self.total_received == 0 {
|
||||
None
|
||||
} else {
|
||||
// the index of the 99th percentage of value
|
||||
let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.;
|
||||
// we get all the values in a sorted manner
|
||||
let time_spent = self.time_spent.into_sorted_vec();
|
||||
// We are only interested by the slowest value of the 99th fastest results
|
||||
let time_spent = time_spent.get(percentile_99th as usize);
|
||||
|
||||
let properties = json!({
|
||||
"user-agent": self.user_agents,
|
||||
"requests": {
|
||||
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
||||
"total_succeeded": self.total_succeeded,
|
||||
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
|
||||
"total_received": self.total_received,
|
||||
},
|
||||
"facets": {
|
||||
"total_distinct_facet_count": self.facet_names.len(),
|
||||
"additional_search_parameters_provided": self.additional_search_parameters_provided,
|
||||
},
|
||||
});
|
||||
|
||||
Some(Track {
|
||||
timestamp: self.timestamp,
|
||||
user: user.clone(),
|
||||
event: event_name.to_string(),
|
||||
properties,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct DocumentsAggregator {
|
||||
timestamp: Option<OffsetDateTime>,
|
||||
|
@ -71,3 +71,40 @@ impl Stream for Payload {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use actix_http::encoding::Decoder as Decompress;
|
||||
use actix_http::BoxedPayloadStream;
|
||||
use bytes::Bytes;
|
||||
use futures_util::StreamExt;
|
||||
use meili_snap::snapshot;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn payload_to_large() {
|
||||
let stream = futures::stream::iter(vec![
|
||||
Ok(Bytes::from("1")),
|
||||
Ok(Bytes::from("2")),
|
||||
Ok(Bytes::from("3")),
|
||||
Ok(Bytes::from("4")),
|
||||
]);
|
||||
let boxed_stream: BoxedPayloadStream = Box::pin(stream);
|
||||
let actix_payload = dev::Payload::from(boxed_stream);
|
||||
|
||||
let payload = Payload {
|
||||
limit: 3,
|
||||
remaining: 3,
|
||||
payload: Decompress::new(actix_payload, actix_http::ContentEncoding::Identity),
|
||||
};
|
||||
|
||||
let mut enumerated_payload_stream = payload.enumerate();
|
||||
|
||||
while let Some((idx, chunk)) = enumerated_payload_stream.next().await {
|
||||
if idx == 3 {
|
||||
snapshot!(chunk.unwrap_err(), @"The provided payload reached the size limit. The maximum accepted payload size is 3 B.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ pub fn create_app(
|
||||
analytics.clone(),
|
||||
)
|
||||
})
|
||||
.configure(|cfg| routes::configure(cfg, opt.experimental_enable_metrics))
|
||||
.configure(routes::configure)
|
||||
.configure(|s| dashboard(s, enable_dashboard));
|
||||
|
||||
let app = app.wrap(actix_web::middleware::Condition::new(
|
||||
@ -221,6 +221,7 @@ fn open_or_create_database_unchecked(
|
||||
// we don't want to create anything in the data.ms yet, thus we
|
||||
// wrap our two builders in a closure that'll be executed later.
|
||||
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
|
||||
let instance_features = opt.to_instance_features();
|
||||
let index_scheduler_builder = || -> anyhow::Result<_> {
|
||||
Ok(IndexScheduler::new(IndexSchedulerOptions {
|
||||
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
||||
@ -238,6 +239,7 @@ fn open_or_create_database_unchecked(
|
||||
max_number_of_tasks: 1_000_000,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features,
|
||||
})?)
|
||||
};
|
||||
|
||||
@ -307,12 +309,16 @@ fn import_dump(
|
||||
keys.push(key);
|
||||
}
|
||||
|
||||
// 3. Import the runtime features.
|
||||
let features = dump_reader.features()?.unwrap_or_default();
|
||||
index_scheduler.put_runtime_features(features)?;
|
||||
|
||||
let indexer_config = index_scheduler.indexer_config();
|
||||
|
||||
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
|
||||
// try to process tasks while we're trying to import the indexes.
|
||||
|
||||
// 3. Import the indexes.
|
||||
// 4. Import the indexes.
|
||||
for index_reader in dump_reader.indexes()? {
|
||||
let mut index_reader = index_reader?;
|
||||
let metadata = index_reader.metadata();
|
||||
@ -324,19 +330,19 @@ fn import_dump(
|
||||
let mut wtxn = index.write_txn()?;
|
||||
|
||||
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
|
||||
// 3.1 Import the primary key if there is one.
|
||||
// 4.1 Import the primary key if there is one.
|
||||
if let Some(ref primary_key) = metadata.primary_key {
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
}
|
||||
|
||||
// 3.2 Import the settings.
|
||||
// 4.2 Import the settings.
|
||||
log::info!("Importing the settings.");
|
||||
let settings = index_reader.settings()?;
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
|
||||
|
||||
// 3.3 Import the documents.
|
||||
// 3.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
// 4.3 Import the documents.
|
||||
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
log::info!("Importing the documents.");
|
||||
let file = tempfile::tempfile()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
||||
@ -347,7 +353,7 @@ fn import_dump(
|
||||
// This flush the content of the batch builder.
|
||||
let file = builder.into_inner()?.into_inner()?;
|
||||
|
||||
// 3.3.2 We feed it to the milli index.
|
||||
// 4.3.2 We feed it to the milli index.
|
||||
let reader = BufReader::new(file);
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
@ -372,7 +378,7 @@ fn import_dump(
|
||||
|
||||
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
||||
|
||||
// 4. Import the tasks.
|
||||
// 5. Import the tasks.
|
||||
for ret in dump_reader.tasks()? {
|
||||
let (task, file) = ret?;
|
||||
index_scheduler_dump.register_dumped_task(task, file)?;
|
||||
|
@ -29,6 +29,10 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
#[cfg(feature = "profile-with-puffin")]
|
||||
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
|
||||
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
|
||||
|
||||
anyhow::ensure!(
|
||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||
@ -186,9 +190,10 @@ Anonymous telemetry:\t\"Enabled\""
|
||||
}
|
||||
|
||||
eprintln!();
|
||||
eprintln!("Documentation:\t\thttps://www.meilisearch.com/docs");
|
||||
eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch");
|
||||
eprintln!("Discord:\t\thttps://discord.meilisearch.com");
|
||||
eprintln!("Check out Meilisearch Cloud!\thttps://cloud.meilisearch.com/login?utm_campaign=oss&utm_source=engine&utm_medium=cli");
|
||||
eprintln!("Documentation:\t\t\thttps://www.meilisearch.com/docs");
|
||||
eprintln!("Source code:\t\t\thttps://github.com/meilisearch/meilisearch");
|
||||
eprintln!("Discord:\t\t\thttps://discord.meilisearch.com");
|
||||
eprintln!();
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,7 @@ fn create_buckets() -> [f64; 29] {
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub static ref HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: [f64; 29] = create_buckets();
|
||||
pub static ref MEILISEARCH_HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: [f64; 29] = create_buckets();
|
||||
pub static ref MEILISEARCH_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
|
||||
opts!("meilisearch_http_requests_total", "Meilisearch HTTP requests total"),
|
||||
&["method", "path"]
|
||||
@ -39,10 +39,10 @@ lazy_static! {
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
|
||||
"http_response_time_seconds",
|
||||
"HTTP response times",
|
||||
"meilisearch_http_response_time_seconds",
|
||||
"Meilisearch HTTP response times",
|
||||
&["method", "path"],
|
||||
HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec()
|
||||
MEILISEARCH_HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec()
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_NB_TASKS: IntGaugeVec = register_int_gauge_vec!(
|
||||
|
@ -12,6 +12,7 @@ use std::{env, fmt, fs};
|
||||
|
||||
use byte_unit::{Byte, ByteError};
|
||||
use clap::Parser;
|
||||
use meilisearch_types::features::InstanceTogglableFeatures;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use rustls::server::{
|
||||
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache,
|
||||
@ -486,6 +487,10 @@ impl Opt {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures {
|
||||
InstanceTogglableFeatures { metrics: self.experimental_enable_metrics }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Parser, Deserialize)]
|
||||
|
70
meilisearch/src/routes/features.rs
Normal file
70
meilisearch/src/routes/features.rs
Normal file
@ -0,0 +1,70 @@
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::Deserr;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(get_features)))
|
||||
.route(web::patch().to(SeqHandler(patch_features))),
|
||||
);
|
||||
}
|
||||
|
||||
async fn get_features(
|
||||
index_scheduler: GuardedData<
|
||||
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>,
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
req: HttpRequest,
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let features = index_scheduler.features()?;
|
||||
|
||||
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
|
||||
debug!("returns: {:?}", features.runtime_features());
|
||||
Ok(HttpResponse::Ok().json(features.runtime_features()))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct RuntimeTogglableFeatures {
|
||||
#[deserr(default)]
|
||||
pub score_details: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub vector_store: Option<bool>,
|
||||
}
|
||||
|
||||
async fn patch_features(
|
||||
index_scheduler: GuardedData<
|
||||
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>,
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
new_features: AwebJson<RuntimeTogglableFeatures, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let features = index_scheduler.features()?;
|
||||
|
||||
let old_features = features.runtime_features();
|
||||
|
||||
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
|
||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||
};
|
||||
|
||||
analytics.publish("Experimental features Updated".to_string(), json!(new_features), Some(&req));
|
||||
index_scheduler.put_runtime_features(new_features)?;
|
||||
Ok(HttpResponse::Ok().json(new_features))
|
||||
}
|
124
meilisearch/src/routes/indexes/facet_search.rs
Normal file
124
meilisearch/src/routes/indexes/facet_search.rs
Normal file
@ -0,0 +1,124 @@
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::analytics::{Analytics, FacetSearchAggregator};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(search)));
|
||||
}
|
||||
|
||||
/// # Important
|
||||
///
|
||||
/// Intentionally don't use `deny_unknown_fields` to ignore search parameters sent by user
|
||||
#[derive(Debug, Clone, Default, PartialEq, deserr::Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase)]
|
||||
pub struct FacetSearchQuery {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchQuery>)]
|
||||
pub facet_query: Option<String>,
|
||||
#[deserr(error = DeserrJsonError<InvalidFacetSearchFacetName>, missing_field_error = DeserrJsonError::missing_facet_search_facet_name)]
|
||||
pub facet_name: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
pub async fn search(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.into_inner();
|
||||
debug!("facet search called with params: {:?}", query);
|
||||
|
||||
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
|
||||
|
||||
let facet_query = query.facet_query.clone();
|
||||
let facet_name = query.facet_name.clone();
|
||||
let mut search_query = SearchQuery::from(query);
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut search_query, search_rules);
|
||||
}
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features()?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_facet_search(&index, search_query, facet_query, facet_name, features)
|
||||
})
|
||||
.await?;
|
||||
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
analytics.post_facet_search(aggregate);
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
impl From<FacetSearchQuery> for SearchQuery {
|
||||
fn from(value: FacetSearchQuery) -> Self {
|
||||
let FacetSearchQuery {
|
||||
facet_query: _,
|
||||
facet_name: _,
|
||||
q,
|
||||
vector,
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
} = value;
|
||||
|
||||
SearchQuery {
|
||||
q,
|
||||
offset: DEFAULT_SEARCH_OFFSET(),
|
||||
limit: DEFAULT_SEARCH_LIMIT(),
|
||||
page: None,
|
||||
hits_per_page: None,
|
||||
attributes_to_retrieve: None,
|
||||
attributes_to_crop: None,
|
||||
crop_length: DEFAULT_CROP_LENGTH(),
|
||||
attributes_to_highlight: None,
|
||||
show_matches_position: false,
|
||||
show_ranking_score: false,
|
||||
show_ranking_score_details: false,
|
||||
filter,
|
||||
sort: None,
|
||||
facets: None,
|
||||
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
|
||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||
crop_marker: DEFAULT_CROP_MARKER(),
|
||||
matching_strategy,
|
||||
vector,
|
||||
attributes_to_search_on,
|
||||
}
|
||||
}
|
||||
}
|
@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
pub mod documents;
|
||||
pub mod facet_search;
|
||||
pub mod search;
|
||||
pub mod settings;
|
||||
|
||||
@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
|
||||
.service(web::scope("/documents").configure(documents::configure))
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
);
|
||||
}
|
||||
|
@ -34,6 +34,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
pub struct SearchQueryGet {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
|
||||
q: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchVector>)]
|
||||
vector: Option<Vec<f32>>,
|
||||
#[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSearchOffset>)]
|
||||
offset: Param<usize>,
|
||||
#[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSearchLimit>)]
|
||||
@ -56,6 +58,10 @@ pub struct SearchQueryGet {
|
||||
sort: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
||||
show_matches_position: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
|
||||
show_ranking_score: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScoreDetails>)]
|
||||
show_ranking_score_details: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
|
||||
facets: Option<CS<String>>,
|
||||
#[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
|
||||
@ -66,6 +72,8 @@ pub struct SearchQueryGet {
|
||||
crop_marker: String,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
|
||||
matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToSearchOn>)]
|
||||
pub attributes_to_search_on: Option<CS<String>>,
|
||||
}
|
||||
|
||||
impl From<SearchQueryGet> for SearchQuery {
|
||||
@ -80,6 +88,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
|
||||
Self {
|
||||
q: other.q,
|
||||
vector: other.vector,
|
||||
offset: other.offset.0,
|
||||
limit: other.limit.0,
|
||||
page: other.page.as_deref().copied(),
|
||||
@ -91,11 +100,14 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
filter,
|
||||
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
|
||||
show_matches_position: other.show_matches_position.0,
|
||||
show_ranking_score: other.show_ranking_score.0,
|
||||
show_ranking_score_details: other.show_ranking_score_details.0,
|
||||
facets: other.facets.map(|o| o.into_iter().collect()),
|
||||
highlight_pre_tag: other.highlight_pre_tag,
|
||||
highlight_post_tag: other.highlight_post_tag,
|
||||
crop_marker: other.crop_marker,
|
||||
matching_strategy: other.matching_strategy,
|
||||
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -145,7 +157,9 @@ pub async fn search_with_url_query(
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
|
||||
let features = index_scheduler.features()?;
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
@ -177,7 +191,10 @@ pub async fn search_with_post(
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
|
||||
|
||||
let features = index_scheduler.features()?;
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
|
@ -309,6 +309,81 @@ make_setting_route!(
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/non-separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
|
||||
>,
|
||||
non_separator_tokens,
|
||||
"nonSeparatorTokens",
|
||||
analytics,
|
||||
|non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"nonSeparatorTokens Updated".to_string(),
|
||||
json!({
|
||||
"non_separator_tokens": {
|
||||
"total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
|
||||
>,
|
||||
separator_tokens,
|
||||
"separatorTokens",
|
||||
analytics,
|
||||
|separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"separatorTokens Updated".to_string(),
|
||||
json!({
|
||||
"separator_tokens": {
|
||||
"total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/dictionary",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
|
||||
>,
|
||||
dictionary,
|
||||
"dictionary",
|
||||
analytics,
|
||||
|dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"dictionary Updated".to_string(),
|
||||
json!({
|
||||
"dictionary": {
|
||||
"total": dictionary.as_ref().map(|dictionary| dictionary.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/synonyms",
|
||||
put,
|
||||
@ -401,12 +476,17 @@ make_setting_route!(
|
||||
analytics,
|
||||
|setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
|
||||
analytics.publish(
|
||||
"Faceting Updated".to_string(),
|
||||
json!({
|
||||
"faceting": {
|
||||
"max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
|
||||
"sort_facet_values_by_star_count": setting.as_ref().and_then(|s| {
|
||||
s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
|
||||
}),
|
||||
"sort_facet_values_by_total": setting.as_ref().and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
@ -545,6 +625,10 @@ pub async fn update_all(
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.max_values_per_facet.as_ref().set()),
|
||||
"sort_facet_values_by": new_settings.faceting
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.sort_facet_values_by.as_ref().set()),
|
||||
},
|
||||
"pagination": {
|
||||
"max_total_hits": new_settings.pagination
|
||||
|
@ -19,6 +19,7 @@ pub async fn get_metrics(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: Data<AuthController>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features()?.check_metrics()?;
|
||||
let auth_filters = index_scheduler.filters();
|
||||
if !auth_filters.all_indexes_authorized() {
|
||||
let mut error = ResponseError::from(AuthenticationError::InvalidToken);
|
||||
|
@ -20,13 +20,14 @@ const PAGINATION_DEFAULT_LIMIT: usize = 20;
|
||||
|
||||
mod api_key;
|
||||
mod dump;
|
||||
pub mod features;
|
||||
pub mod indexes;
|
||||
mod metrics;
|
||||
mod multi_search;
|
||||
mod swap_indexes;
|
||||
pub mod tasks;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig, enable_metrics: bool) {
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::scope("/tasks").configure(tasks::configure))
|
||||
.service(web::resource("/health").route(web::get().to(get_health)))
|
||||
.service(web::scope("/keys").configure(api_key::configure))
|
||||
@ -35,11 +36,9 @@ pub fn configure(cfg: &mut web::ServiceConfig, enable_metrics: bool) {
|
||||
.service(web::resource("/version").route(web::get().to(get_version)))
|
||||
.service(web::scope("/indexes").configure(indexes::configure))
|
||||
.service(web::scope("/multi-search").configure(multi_search::configure))
|
||||
.service(web::scope("/swap-indexes").configure(swap_indexes::configure));
|
||||
|
||||
if enable_metrics {
|
||||
cfg.service(web::scope("/metrics").configure(metrics::configure));
|
||||
}
|
||||
.service(web::scope("/swap-indexes").configure(swap_indexes::configure))
|
||||
.service(web::scope("/metrics").configure(metrics::configure))
|
||||
.service(web::scope("/experimental-features").configure(features::configure));
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
|
@ -41,6 +41,7 @@ pub async fn multi_search_with_post(
|
||||
let queries = params.into_inner().queries;
|
||||
|
||||
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
|
||||
let features = index_scheduler.features()?;
|
||||
|
||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||
@ -74,8 +75,9 @@ pub async fn multi_search_with_post(
|
||||
err
|
||||
})
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query))
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features))
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
|
@ -5,17 +5,26 @@ use std::time::Instant;
|
||||
|
||||
use deserr::Deserr;
|
||||
use either::Either;
|
||||
use index_scheduler::RoFeatures;
|
||||
use indexmap::IndexMap;
|
||||
use log::warn;
|
||||
use meilisearch_auth::IndexSearchRules;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::{
|
||||
dot_product_similarity, FacetValueHit, InternalError, OrderBy, SearchForFacetValues,
|
||||
};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
use milli::{
|
||||
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder,
|
||||
SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
SortError, TermsMatchingStrategy, VectorOrArrayOfVectors, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
use ordered_float::OrderedFloat;
|
||||
use regex::Regex;
|
||||
use serde::Serialize;
|
||||
use serde_json::{json, Value};
|
||||
@ -31,11 +40,13 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, Deserr)]
|
||||
#[derive(Debug, Clone, Default, PartialEq, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct SearchQuery {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
@ -54,6 +65,10 @@ pub struct SearchQuery {
|
||||
pub attributes_to_highlight: Option<HashSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
|
||||
pub show_matches_position: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScore>, default)]
|
||||
pub show_ranking_score: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScoreDetails>, default)]
|
||||
pub show_ranking_score_details: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||
@ -68,6 +83,8 @@ pub struct SearchQuery {
|
||||
pub crop_marker: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl SearchQuery {
|
||||
@ -80,13 +97,15 @@ impl SearchQuery {
|
||||
// This struct contains the fields of `SearchQuery` inline.
|
||||
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
|
||||
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
|
||||
#[derive(Debug, Clone, PartialEq, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct SearchQueryWithIndex {
|
||||
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
|
||||
pub index_uid: IndexUid,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
@ -103,6 +122,10 @@ pub struct SearchQueryWithIndex {
|
||||
pub crop_length: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
|
||||
pub attributes_to_highlight: Option<HashSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScore>, default)]
|
||||
pub show_ranking_score: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScoreDetails>, default)]
|
||||
pub show_ranking_score_details: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
|
||||
pub show_matches_position: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
@ -119,6 +142,8 @@ pub struct SearchQueryWithIndex {
|
||||
pub crop_marker: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl SearchQueryWithIndex {
|
||||
@ -126,6 +151,7 @@ impl SearchQueryWithIndex {
|
||||
let SearchQueryWithIndex {
|
||||
index_uid,
|
||||
q,
|
||||
vector,
|
||||
offset,
|
||||
limit,
|
||||
page,
|
||||
@ -134,6 +160,8 @@ impl SearchQueryWithIndex {
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
attributes_to_highlight,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
show_matches_position,
|
||||
filter,
|
||||
sort,
|
||||
@ -142,11 +170,13 @@ impl SearchQueryWithIndex {
|
||||
highlight_post_tag,
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
} = self;
|
||||
(
|
||||
index_uid,
|
||||
SearchQuery {
|
||||
q,
|
||||
vector,
|
||||
offset,
|
||||
limit,
|
||||
page,
|
||||
@ -155,6 +185,8 @@ impl SearchQueryWithIndex {
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
attributes_to_highlight,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
show_matches_position,
|
||||
filter,
|
||||
sort,
|
||||
@ -163,6 +195,7 @@ impl SearchQueryWithIndex {
|
||||
highlight_post_tag,
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
// do not use ..Default::default() here,
|
||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||
},
|
||||
@ -170,7 +203,7 @@ impl SearchQueryWithIndex {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
|
||||
#[deserr(rename_all = camelCase)]
|
||||
pub enum MatchingStrategy {
|
||||
/// Remove query words from last to first
|
||||
@ -194,7 +227,27 @@ impl From<MatchingStrategy> for TermsMatchingStrategy {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Deserr)]
|
||||
#[deserr(rename_all = camelCase)]
|
||||
pub enum FacetValuesSort {
|
||||
/// Facet values are sorted in alphabetical order, ascending from A to Z.
|
||||
#[default]
|
||||
Alpha,
|
||||
/// Facet values are sorted by decreasing count.
|
||||
/// The count is the number of records containing this facet value in the results of the query.
|
||||
Count,
|
||||
}
|
||||
|
||||
impl From<FacetValuesSort> for OrderBy {
|
||||
fn from(val: FacetValuesSort) -> Self {
|
||||
match val {
|
||||
FacetValuesSort::Alpha => OrderBy::Lexicographic,
|
||||
FacetValuesSort::Count => OrderBy::Count,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||
pub struct SearchHit {
|
||||
#[serde(flatten)]
|
||||
pub document: Document,
|
||||
@ -202,6 +255,12 @@ pub struct SearchHit {
|
||||
pub formatted: Document,
|
||||
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
|
||||
pub matches_position: Option<MatchesPosition>,
|
||||
#[serde(rename = "_rankingScore", skip_serializing_if = "Option::is_none")]
|
||||
pub ranking_score: Option<f64>,
|
||||
#[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")]
|
||||
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
#[serde(rename = "_semanticScore", skip_serializing_if = "Option::is_none")]
|
||||
pub semantic_score: Option<f32>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||
@ -209,11 +268,13 @@ pub struct SearchHit {
|
||||
pub struct SearchResult {
|
||||
pub hits: Vec<SearchHit>,
|
||||
pub query: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
pub processing_time_ms: u128,
|
||||
#[serde(flatten)]
|
||||
pub hits_info: HitsInfo,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
|
||||
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||
}
|
||||
@ -241,6 +302,14 @@ pub struct FacetStats {
|
||||
pub max: f64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FacetSearchResult {
|
||||
pub facet_hits: Vec<FacetValueHit>,
|
||||
pub facet_query: Option<String>,
|
||||
pub processing_time_ms: u128,
|
||||
}
|
||||
|
||||
/// Incorporate search rules in search query
|
||||
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
|
||||
query.filter = match (query.filter.take(), rules.filter) {
|
||||
@ -261,28 +330,52 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
fn prepare_search<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t RoTxn,
|
||||
query: &'t SearchQuery,
|
||||
features: RoFeatures,
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
||||
let mut search = index.search(rtxn);
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
if query.vector.is_some() && query.q.is_some() {
|
||||
warn!("Ignoring the query string `q` when used with the `vector` parameter.");
|
||||
}
|
||||
|
||||
if let Some(ref vector) = query.vector {
|
||||
search.vector(vector.clone());
|
||||
}
|
||||
|
||||
if let Some(ref query) = query.q {
|
||||
search.query(query);
|
||||
}
|
||||
|
||||
if let Some(ref searchable) = query.attributes_to_search_on {
|
||||
search.searchable_attributes(searchable);
|
||||
}
|
||||
|
||||
let is_finite_pagination = query.is_finite_pagination();
|
||||
search.terms_matching_strategy(query.matching_strategy.into());
|
||||
|
||||
let max_total_hits = index
|
||||
.pagination_max_total_hits(&rtxn)
|
||||
.pagination_max_total_hits(rtxn)
|
||||
.map_err(milli::Error::from)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
search.exhaustive_number_hits(is_finite_pagination);
|
||||
search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
|
||||
ScoringStrategy::Detailed
|
||||
} else {
|
||||
ScoringStrategy::Skip
|
||||
});
|
||||
|
||||
if query.show_ranking_score_details {
|
||||
features.check_score_details()?;
|
||||
}
|
||||
|
||||
if query.vector.is_some() {
|
||||
features.check_vector()?;
|
||||
}
|
||||
|
||||
// compute the offset on the limit depending on the pagination mode.
|
||||
let (offset, limit) = if is_finite_pagination {
|
||||
@ -320,7 +413,22 @@ pub fn perform_search(
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
|
||||
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
|
||||
Ok((search, is_finite_pagination, max_total_hits, offset))
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
features: RoFeatures,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||
prepare_search(index, &rtxn, &query, features)?;
|
||||
|
||||
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
|
||||
search.execute()?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
|
||||
@ -383,16 +491,29 @@ pub fn perform_search(
|
||||
tokenizer_builder.allow_list(&script_lang_map);
|
||||
}
|
||||
|
||||
let separators = index.allowed_separators(&rtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
tokenizer_builder.separators(separators);
|
||||
}
|
||||
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
tokenizer_builder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
|
||||
formatter_builder.crop_marker(query.crop_marker);
|
||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
||||
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
let documents_iter = index.documents(&rtxn, documents_ids)?;
|
||||
|
||||
for (_id, obkv) in documents_iter {
|
||||
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
||||
// First generate a document with all the displayed fields
|
||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||
|
||||
@ -416,7 +537,27 @@ pub fn perform_search(
|
||||
insert_geo_distance(sort, &mut document);
|
||||
}
|
||||
|
||||
let hit = SearchHit { document, formatted, matches_position };
|
||||
let semantic_score = match query.vector.as_ref() {
|
||||
Some(vector) => match extract_field("_vectors", &fields_ids_map, obkv)? {
|
||||
Some(vectors) => compute_semantic_score(vector, vectors)?,
|
||||
None => None,
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
|
||||
let ranking_score =
|
||||
query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
||||
let ranking_score_details =
|
||||
query.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
|
||||
|
||||
let hit = SearchHit {
|
||||
document,
|
||||
formatted,
|
||||
matches_position,
|
||||
ranking_score_details,
|
||||
ranking_score,
|
||||
semantic_score,
|
||||
};
|
||||
documents.push(hit);
|
||||
}
|
||||
|
||||
@ -448,10 +589,30 @@ pub fn perform_search(
|
||||
.unwrap_or(DEFAULT_VALUES_PER_FACET);
|
||||
facet_distribution.max_values_per_facet(max_values_by_facet);
|
||||
|
||||
let sort_facet_values_by =
|
||||
index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?;
|
||||
let default_sort_facet_values_by =
|
||||
sort_facet_values_by.get("*").copied().unwrap_or_default();
|
||||
|
||||
if fields.iter().all(|f| f != "*") {
|
||||
let fields: Vec<_> = fields
|
||||
.iter()
|
||||
.map(|n| {
|
||||
(
|
||||
n,
|
||||
sort_facet_values_by
|
||||
.get(n)
|
||||
.copied()
|
||||
.unwrap_or(default_sort_facet_values_by),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
facet_distribution.facets(fields);
|
||||
}
|
||||
let distribution = facet_distribution.candidates(candidates).execute()?;
|
||||
let distribution = facet_distribution
|
||||
.candidates(candidates)
|
||||
.default_order_by(default_sort_facet_values_by)
|
||||
.execute()?;
|
||||
let stats = facet_distribution.compute_stats()?;
|
||||
(Some(distribution), Some(stats))
|
||||
}
|
||||
@ -465,7 +626,8 @@ pub fn perform_search(
|
||||
let result = SearchResult {
|
||||
hits: documents,
|
||||
hits_info,
|
||||
query: query.q.clone().unwrap_or_default(),
|
||||
query: query.q.unwrap_or_default(),
|
||||
vector: query.vector,
|
||||
processing_time_ms: before_search.elapsed().as_millis(),
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
@ -473,6 +635,29 @@ pub fn perform_search(
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn perform_facet_search(
|
||||
index: &Index,
|
||||
search_query: SearchQuery,
|
||||
facet_query: Option<String>,
|
||||
facet_name: String,
|
||||
features: RoFeatures,
|
||||
) -> Result<FacetSearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features)?;
|
||||
let mut facet_search = SearchForFacetValues::new(facet_name, search);
|
||||
if let Some(facet_query) = &facet_query {
|
||||
facet_search.query(facet_query);
|
||||
}
|
||||
|
||||
Ok(FacetSearchResult {
|
||||
facet_hits: facet_search.execute()?,
|
||||
facet_query,
|
||||
processing_time_ms: before_search.elapsed().as_millis(),
|
||||
})
|
||||
}
|
||||
|
||||
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||
lazy_static::lazy_static! {
|
||||
static ref GEO_REGEX: Regex =
|
||||
@ -489,6 +674,17 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_semantic_score(query: &[f32], vectors: Value) -> milli::Result<Option<f32>> {
|
||||
let vectors = serde_json::from_value(vectors)
|
||||
.map(VectorOrArrayOfVectors::into_array_of_vectors)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
Ok(vectors
|
||||
.into_iter()
|
||||
.map(|v| OrderedFloat(dot_product_similarity(query, &v)))
|
||||
.max()
|
||||
.map(OrderedFloat::into_inner))
|
||||
}
|
||||
|
||||
fn compute_formatted_options(
|
||||
attr_to_highlight: &HashSet<String>,
|
||||
attr_to_crop: &[String],
|
||||
@ -616,10 +812,26 @@ fn make_document(
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
fn format_fields<A: AsRef<[u8]>>(
|
||||
/// Extract the JSON value under the field name specified
|
||||
/// but doesn't support nested objects.
|
||||
fn extract_field(
|
||||
field_name: &str,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
obkv: obkv::KvReaderU16,
|
||||
) -> Result<Option<serde_json::Value>, MeilisearchHttpError> {
|
||||
match field_ids_map.id(field_name) {
|
||||
Some(fid) => match obkv.get(fid) {
|
||||
Some(value) => Ok(serde_json::from_slice(value).map(Some)?),
|
||||
None => Ok(None),
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn format_fields<'a>(
|
||||
document: &Document,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
builder: &MatcherBuilder<'_, A>,
|
||||
builder: &'a MatcherBuilder<'a>,
|
||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
||||
compute_matches: bool,
|
||||
displayable_ids: &BTreeSet<FieldId>,
|
||||
@ -664,9 +876,9 @@ fn format_fields<A: AsRef<[u8]>>(
|
||||
Ok((matches_position, document))
|
||||
}
|
||||
|
||||
fn format_value<A: AsRef<[u8]>>(
|
||||
fn format_value<'a>(
|
||||
value: Value,
|
||||
builder: &MatcherBuilder<'_, A>,
|
||||
builder: &'a MatcherBuilder<'a>,
|
||||
format_options: Option<FormatOptions>,
|
||||
infos: &mut Vec<MatchBounds>,
|
||||
compute_matches: bool,
|
||||
|
@ -422,7 +422,7 @@ async fn error_add_api_key_invalid_parameters_actions() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
|
||||
{
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`",
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
|
@ -90,7 +90,7 @@ async fn create_api_key_bad_actions() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`",
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
|
@ -346,17 +346,24 @@ impl Index<'_> {
|
||||
query: Value,
|
||||
test: impl Fn(Value, StatusCode) + UnwindSafe + Clone,
|
||||
) {
|
||||
let (response, code) = self.search_post(query.clone()).await;
|
||||
let t = test.clone();
|
||||
if let Err(e) = catch_unwind(move || t(response, code)) {
|
||||
eprintln!("Error with post search");
|
||||
resume_unwind(e);
|
||||
}
|
||||
let post = self.search_post(query.clone()).await;
|
||||
|
||||
let query = yaup::to_string(&query).unwrap();
|
||||
let (response, code) = self.search_get(&query).await;
|
||||
if let Err(e) = catch_unwind(move || test(response, code)) {
|
||||
eprintln!("Error with get search");
|
||||
resume_unwind(e);
|
||||
let get = self.search_get(&query).await;
|
||||
|
||||
insta::allow_duplicates! {
|
||||
let (response, code) = post;
|
||||
let t = test.clone();
|
||||
if let Err(e) = catch_unwind(move || t(response, code)) {
|
||||
eprintln!("Error with post search");
|
||||
resume_unwind(e);
|
||||
}
|
||||
|
||||
let (response, code) = get;
|
||||
if let Err(e) = catch_unwind(move || test(response, code)) {
|
||||
eprintln!("Error with get search");
|
||||
resume_unwind(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -370,6 +377,11 @@ impl Index<'_> {
|
||||
self.service.get(url).await
|
||||
}
|
||||
|
||||
pub async fn facet_search(&self, query: Value) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/facet-search", urlencode(self.uid.as_ref()));
|
||||
self.service.post_encoded(url, query, self.encoder).await
|
||||
}
|
||||
|
||||
pub async fn update_distinct_attribute(&self, value: Value) -> (Value, StatusCode) {
|
||||
let url =
|
||||
format!("/indexes/{}/settings/{}", urlencode(self.uid.as_ref()), "distinct-attribute");
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -963,3 +963,29 @@ async fn sort_unset_ranking_rule() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_on_unknown_field() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
92
meilisearch/tests/search/facet_search.rs
Normal file
92
meilisearch/tests/search/facet_search.rs
Normal file
@ -0,0 +1,92 @@
|
||||
use once_cell::sync::Lazy;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::common::Server;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"genres": ["Action", "Adventure"],
|
||||
"id": "287947",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"genres": ["Action", "Adventure"],
|
||||
"id": "299537",
|
||||
},
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"genres": ["Horror", "Thriller", "Multiple Words"],
|
||||
"id": "522681",
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"genres": ["Action", "Comedy"],
|
||||
"id": "166428",
|
||||
},
|
||||
{
|
||||
"title": "Gläss",
|
||||
"genres": ["Thriller"],
|
||||
"id": "450465",
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_facet_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 2);
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "adventure"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["facetHits"].as_array().unwrap().len(), 1);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn non_filterable_facet_search_error() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
assert_eq!(code, 400, "{}", response);
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "adv"})).await;
|
||||
assert_eq!(code, 400, "{}", response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn facet_search_dont_support_words() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "words"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["facetHits"].as_array().unwrap().len(), 0);
|
||||
}
|
@ -1,3 +1,4 @@
|
||||
use insta::{allow_duplicates, assert_json_snapshot};
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
@ -18,30 +19,43 @@ async fn formatted_contain_wildcard() {
|
||||
|response, code|
|
||||
{
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
"cattos": "<em>pésti</em>",
|
||||
},
|
||||
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
|
||||
})
|
||||
);
|
||||
}
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
"cattos": "<em>pésti</em>"
|
||||
},
|
||||
"_matchesPosition": {
|
||||
"cattos": [
|
||||
{
|
||||
"start": 0,
|
||||
"length": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({ "q": "pésti", "attributesToRetrieve": ["*"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"id": 852,
|
||||
"cattos": "pésti",
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"id": 852,
|
||||
"cattos": "pésti"
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
@ -50,20 +64,29 @@ async fn formatted_contain_wildcard() {
|
||||
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"], "showMatchesPosition": true }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"id": 852,
|
||||
"cattos": "pésti",
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
"cattos": "pésti",
|
||||
},
|
||||
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
|
||||
})
|
||||
);
|
||||
}
|
||||
)
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"id": 852,
|
||||
"cattos": "pésti",
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
"cattos": "pésti"
|
||||
},
|
||||
"_matchesPosition": {
|
||||
"cattos": [
|
||||
{
|
||||
"start": 0,
|
||||
"length": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
@ -71,17 +94,20 @@ async fn formatted_contain_wildcard() {
|
||||
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"id": 852,
|
||||
"cattos": "pésti",
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
"cattos": "pésti",
|
||||
}
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"id": 852,
|
||||
"cattos": "pésti",
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
"cattos": "pésti"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@ -89,17 +115,20 @@ async fn formatted_contain_wildcard() {
|
||||
index
|
||||
.search(json!({ "q": "pésti", "attributesToCrop": ["*"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"id": 852,
|
||||
"cattos": "pésti",
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
"cattos": "pésti",
|
||||
}
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"id": 852,
|
||||
"cattos": "pésti",
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
"cattos": "pésti"
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
}
|
||||
@ -116,21 +145,24 @@ async fn format_nested() {
|
||||
index
|
||||
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2,
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4,
|
||||
},
|
||||
],
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
@ -139,19 +171,22 @@ async fn format_nested() {
|
||||
json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"] }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
},
|
||||
],
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby"
|
||||
},
|
||||
{
|
||||
"name": "buddy"
|
||||
}
|
||||
]
|
||||
}
|
||||
"###)
|
||||
}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@ -161,20 +196,30 @@ async fn format_nested() {
|
||||
json!({ "q": "bobby", "attributesToRetrieve": ["doggos.name"], "showMatchesPosition": true }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
},
|
||||
],
|
||||
"_matchesPosition": {"doggos.name": [{"start": 0, "length": 5}]},
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby"
|
||||
},
|
||||
{
|
||||
"name": "buddy"
|
||||
}
|
||||
],
|
||||
"_matchesPosition": {
|
||||
"doggos.name": [
|
||||
{
|
||||
"start": 0,
|
||||
"length": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
}
|
||||
)
|
||||
.await;
|
||||
@ -183,21 +228,24 @@ async fn format_nested() {
|
||||
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"_formatted": {
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"_formatted": {
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby"
|
||||
},
|
||||
{
|
||||
"name": "buddy"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
@ -205,21 +253,24 @@ async fn format_nested() {
|
||||
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"_formatted": {
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"_formatted": {
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby"
|
||||
},
|
||||
{
|
||||
"name": "buddy"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
@ -227,55 +278,61 @@ async fn format_nested() {
|
||||
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
},
|
||||
],
|
||||
"_formatted": {
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": "2",
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": "4",
|
||||
},
|
||||
],
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby"
|
||||
},
|
||||
})
|
||||
);
|
||||
})
|
||||
{
|
||||
"name": "buddy"
|
||||
}
|
||||
],
|
||||
"_formatted": {
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": "2"
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": "4"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"_formatted": {
|
||||
"doggos": [
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": "2",
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": "4",
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
);
|
||||
"_formatted": {
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": "2"
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": "4"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
}
|
||||
)
|
||||
.await;
|
||||
@ -297,54 +354,66 @@ async fn displayedattr_2_smol() {
|
||||
.search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"id": 852,
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"id": 852
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({ "attributesToRetrieve": ["id"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"id": 852,
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"id": 852
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({ "attributesToHighlight": ["id"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"id": 852,
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
}
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"id": 852,
|
||||
"_formatted": {
|
||||
"id": "852"
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({ "attributesToCrop": ["id"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"id": 852,
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
}
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"id": 852,
|
||||
"_formatted": {
|
||||
"id": "852"
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
@ -353,15 +422,18 @@ async fn displayedattr_2_smol() {
|
||||
json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"id": 852,
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
}
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"id": 852,
|
||||
"_formatted": {
|
||||
"id": "852"
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@ -369,31 +441,41 @@ async fn displayedattr_2_smol() {
|
||||
index
|
||||
.search(json!({ "attributesToHighlight": ["cattos"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"id": 852,
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"id": 852
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({ "attributesToCrop": ["cattos"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"id": 852,
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"id": 852
|
||||
}
|
||||
"###)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({ "attributesToRetrieve": ["cattos"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"][0], json!({}));
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@"{}")
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
@ -402,7 +484,11 @@ async fn displayedattr_2_smol() {
|
||||
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"][0], json!({}));
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@"{}")
|
||||
}
|
||||
|
||||
}
|
||||
)
|
||||
@ -413,14 +499,17 @@ async fn displayedattr_2_smol() {
|
||||
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
}
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"_formatted": {
|
||||
"id": "852"
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@ -430,14 +519,17 @@ async fn displayedattr_2_smol() {
|
||||
json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] }),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(
|
||||
response["hits"][0],
|
||||
json!({
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
}
|
||||
})
|
||||
);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"_formatted": {
|
||||
"id": "852"
|
||||
}
|
||||
}
|
||||
"###)
|
||||
}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
@ -2,9 +2,11 @@
|
||||
// should be tested in its own module to isolate tests and keep the tests readable.
|
||||
|
||||
mod errors;
|
||||
mod facet_search;
|
||||
mod formatted;
|
||||
mod multi;
|
||||
mod pagination;
|
||||
mod restrict_searchable;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use serde_json::{json, Value};
|
||||
|
@ -65,7 +65,7 @@ async fn simple_search_single_index() {
|
||||
]}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
|
||||
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###"
|
||||
[
|
||||
{
|
||||
"indexUid": "test",
|
||||
@ -170,7 +170,7 @@ async fn simple_search_two_indexes() {
|
||||
]}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
|
||||
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###"
|
||||
[
|
||||
{
|
||||
"indexUid": "test",
|
||||
|
267
meilisearch/tests/search/restrict_searchable.rs
Normal file
267
meilisearch/tests/search/restrict_searchable.rs
Normal file
@ -0,0 +1,267 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::common::index::Index;
|
||||
use crate::common::Server;
|
||||
|
||||
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(documents.clone(), None).await;
|
||||
index.wait_task(0).await;
|
||||
index
|
||||
}
|
||||
|
||||
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"desc": "a Captain Marvel ersatz",
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "Captain Planet",
|
||||
"desc": "He's not part of the Marvel Cinematic Universe",
|
||||
"id": "2",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "3",
|
||||
}])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search_on_title() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
// simple search should return 2 documents (ids: 2 and 3).
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"].as_array().unwrap().len(), @"2");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_prefix_search_on_title() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
// simple search should return 2 documents (ids: 2 and 3).
|
||||
index
|
||||
.search(json!({"q": "Captain Mar", "attributesToSearchOn": ["title"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"].as_array().unwrap().len(), @"2");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search_on_title_matching_strategy_all() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
// simple search matching strategy all should only return 1 document (ids: 2).
|
||||
index
|
||||
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "matchingStrategy": "all"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"].as_array().unwrap().len(), @"1");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search_on_no_field() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
// simple search on no field shouldn't return any document.
|
||||
index
|
||||
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": []}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"].as_array().unwrap().len(), @"0");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn word_ranking_rule_order() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
// Document 3 should appear before document 2.
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]),
|
||||
@r###"
|
||||
[
|
||||
{
|
||||
"id": "3"
|
||||
},
|
||||
{
|
||||
"id": "2"
|
||||
}
|
||||
]
|
||||
"###
|
||||
);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn word_ranking_rule_order_exact_words() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
index.update_settings_typo_tolerance(json!({"disableOnWords": ["Captain", "Marvel"]})).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
// simple search should return 2 documents (ids: 2 and 3).
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]),
|
||||
@r###"
|
||||
[
|
||||
{
|
||||
"id": "3"
|
||||
},
|
||||
{
|
||||
"id": "2"
|
||||
}
|
||||
]
|
||||
"###
|
||||
);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn typo_ranking_rule_order() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(
|
||||
&server,
|
||||
&json!([
|
||||
{
|
||||
"title": "Capitain Marivel",
|
||||
"desc": "Captain Marvel",
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marivel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "2",
|
||||
}]),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Document 2 should appear before document 1.
|
||||
index
|
||||
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]),
|
||||
@r###"
|
||||
[
|
||||
{
|
||||
"id": "2"
|
||||
},
|
||||
{
|
||||
"id": "1"
|
||||
}
|
||||
]
|
||||
"###
|
||||
);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn attributes_ranking_rule_order() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(
|
||||
&server,
|
||||
&json!([
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"footer": "The story of Captain Marvel",
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "The Avengers",
|
||||
"desc": "Captain Marvel is far from the earth",
|
||||
"footer": "A super hero team",
|
||||
"id": "2",
|
||||
}]),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Document 2 should appear before document 1.
|
||||
index
|
||||
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["desc", "footer"], "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]),
|
||||
@r###"
|
||||
[
|
||||
{
|
||||
"id": "2"
|
||||
},
|
||||
{
|
||||
"id": "1"
|
||||
}
|
||||
]
|
||||
"###
|
||||
);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn exactness_ranking_rule_order() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(
|
||||
&server,
|
||||
&json!([
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "Captain Marivel",
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "CaptainMarvel",
|
||||
"id": "2",
|
||||
}]),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Document 2 should appear before document 1.
|
||||
index
|
||||
.search(json!({"q": "Captain Marvel", "attributesToRetrieve": ["id"], "attributesToSearchOn": ["desc"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]),
|
||||
@r###"
|
||||
[
|
||||
{
|
||||
"id": "2"
|
||||
},
|
||||
{
|
||||
"id": "1"
|
||||
}
|
||||
]
|
||||
"###
|
||||
);
|
||||
})
|
||||
.await;
|
||||
}
|
@ -16,11 +16,17 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
|
||||
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]),
|
||||
);
|
||||
map.insert("stop_words", json!([]));
|
||||
map.insert("non_separator_tokens", json!([]));
|
||||
map.insert("separator_tokens", json!([]));
|
||||
map.insert("dictionary", json!([]));
|
||||
map.insert("synonyms", json!({}));
|
||||
map.insert(
|
||||
"faceting",
|
||||
json!({
|
||||
"maxValuesPerFacet": json!(100),
|
||||
"sortFacetValuesBy": {
|
||||
"*": "alpha"
|
||||
}
|
||||
}),
|
||||
);
|
||||
map.insert(
|
||||
@ -48,7 +54,7 @@ async fn get_settings() {
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
let settings = response.as_object().unwrap();
|
||||
assert_eq!(settings.keys().len(), 11);
|
||||
assert_eq!(settings.keys().len(), 14);
|
||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||
@ -59,10 +65,16 @@ async fn get_settings() {
|
||||
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"])
|
||||
);
|
||||
assert_eq!(settings["stopWords"], json!([]));
|
||||
assert_eq!(settings["nonSeparatorTokens"], json!([]));
|
||||
assert_eq!(settings["separatorTokens"], json!([]));
|
||||
assert_eq!(settings["dictionary"], json!([]));
|
||||
assert_eq!(
|
||||
settings["faceting"],
|
||||
json!({
|
||||
"maxValuesPerFacet": 100,
|
||||
"sortFacetValuesBy": {
|
||||
"*": "alpha"
|
||||
}
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
|
@ -1,3 +1,4 @@
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod get_settings;
|
||||
mod tokenizer_customization;
|
||||
|
467
meilisearch/tests/settings/tokenizer_customization.rs
Normal file
467
meilisearch/tests/settings/tokenizer_customization.rs
Normal file
@ -0,0 +1,467 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::common::Server;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn set_and_reset() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": ["#", "&"],
|
||||
"separatorTokens": ["&sep", "<br/>"],
|
||||
"dictionary": ["J.R.R.", "J. R. R."],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, _) = index.settings().await;
|
||||
snapshot!(json_string!(response["nonSeparatorTokens"]), @r###"
|
||||
[
|
||||
"#",
|
||||
"&"
|
||||
]
|
||||
"###);
|
||||
snapshot!(json_string!(response["separatorTokens"]), @r###"
|
||||
[
|
||||
"&sep",
|
||||
"<br/>"
|
||||
]
|
||||
"###);
|
||||
snapshot!(json_string!(response["dictionary"]), @r###"
|
||||
[
|
||||
"J. R. R.",
|
||||
"J.R.R."
|
||||
]
|
||||
"###);
|
||||
|
||||
index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": null,
|
||||
"separatorTokens": null,
|
||||
"dictionary": null,
|
||||
}))
|
||||
.await;
|
||||
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, _) = index.settings().await;
|
||||
snapshot!(json_string!(response["nonSeparatorTokens"]), @"[]");
|
||||
snapshot!(json_string!(response["separatorTokens"]), @"[]");
|
||||
snapshot!(json_string!(response["dictionary"]), @"[]");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn set_and_search() {
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "G#D#G#D#G#C#D#G#C#",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
},
|
||||
]);
|
||||
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": ["#", "&"],
|
||||
"separatorTokens": ["<br/>", "&sep"],
|
||||
"dictionary": ["#", "A#", "B#", "C#", "D#", "E#", "F#", "G#"],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "&", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "Mac <em>&</em> cheese"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "Mac&sep<em>&</em>&sepcheese"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Mac & cheese", "attributesToHighlight": ["content"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Mac&sep&&sepcheese", "attributesToHighlight": ["content"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "C#D#G", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"content": "G#D#G#D#G#C#D#G#C#",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>G</em>#<em>D#</em><em>G</em>#<em>D#</em><em>G</em>#<em>C#</em><em>D#</em><em>G</em>#<em>C#</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "#", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn advanced_synergies() {
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
},
|
||||
]);
|
||||
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"dictionary": ["J.R.R.", "J. R. R."],
|
||||
"synonyms": {
|
||||
"J.R.R.": ["jrr", "J. R. R."],
|
||||
"J. R. R.": ["jrr", "J.R.R."],
|
||||
"jrr": ["J.R.R.", "J. R. R."],
|
||||
"J.K.": ["jk", "J. K."],
|
||||
"J. K.": ["jk", "J.K."],
|
||||
"jk": ["J.K.", "J. K."],
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J.R.R.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "jrr", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J. R. R.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// Only update dictionary, the synonyms should be recomputed.
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J.K.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J. K.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R.</em> R. Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
@ -15,8 +15,9 @@ license.workspace = true
|
||||
bimap = { version = "0.6.3", features = ["serde"] }
|
||||
bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.7.2", default-features = false }
|
||||
charabia = { version = "0.8.1", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
@ -32,18 +33,22 @@ heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.6", default-f
|
||||
"lmdb",
|
||||
"sync-read-txn",
|
||||
] }
|
||||
hnsw = { version = "0.11.0", features = ["serde1"] }
|
||||
indexmap = { version = "1.9.3", features = ["serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memmap2 = "0.5.10"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.17.1"
|
||||
ordered-float = "3.6.0"
|
||||
rand_pcg = { version = "0.3.1", features = ["serde1"] }
|
||||
rayon = "1.7.0"
|
||||
roaring = "0.10.1"
|
||||
rstar = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
slice-group-by = "0.3.0"
|
||||
space = "0.17.0"
|
||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||
smallvec = "1.10.0"
|
||||
smartstring = "1.0.1"
|
||||
@ -62,6 +67,9 @@ filter-parser = { path = "../filter-parser" }
|
||||
# documents words self-join
|
||||
itertools = "0.10.5"
|
||||
|
||||
# profiling
|
||||
puffin = "0.16.0"
|
||||
|
||||
# logging
|
||||
log = "0.4.17"
|
||||
logging_timer = "1.1.0"
|
||||
|
@ -52,7 +52,9 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
let docs = execute_search(
|
||||
&mut ctx,
|
||||
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
|
||||
&None,
|
||||
TermsMatchingStrategy::Last,
|
||||
milli::score_details::ScoringStrategy::Skip,
|
||||
false,
|
||||
&None,
|
||||
&None,
|
||||
|
25
milli/src/distance.rs
Normal file
25
milli/src/distance.rs
Normal file
@ -0,0 +1,25 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use space::Metric;
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct DotProduct;
|
||||
|
||||
impl Metric<Vec<f32>> for DotProduct {
|
||||
type Unit = u32;
|
||||
|
||||
// Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>.
|
||||
//
|
||||
// Here is a playground that validate the ordering of the bit representation of floats in range 0.0..=1.0:
|
||||
// <https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=6c59e31a3cc5036b32edf51e8937b56e>
|
||||
fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
|
||||
let dist = 1.0 - dot_product_similarity(a, b);
|
||||
debug_assert!(!dist.is_nan());
|
||||
dist.to_bits()
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the dot product similarity score that will between 0.0 and 1.0
|
||||
/// if both vectors are normalized. The higher the more similar the vectors are.
|
||||
pub fn dot_product_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
a.iter().zip(b).map(|(a, b)| a * b).sum()
|
||||
}
|
@ -110,9 +110,13 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
},
|
||||
#[error(transparent)]
|
||||
InvalidGeoField(#[from] GeoError),
|
||||
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
|
||||
InvalidVectorDimensions { expected: usize, found: usize },
|
||||
#[error("The `_vectors` field in the document with the id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
|
||||
InvalidVectorsType { document_id: Value, value: Value },
|
||||
#[error("{0}")]
|
||||
InvalidFilter(String),
|
||||
#[error("Invalid type for filter subexpression: `expected {}, found: {1}`.", .0.join(", "))]
|
||||
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
|
||||
InvalidFilterExpression(&'static [&'static str], Value),
|
||||
#[error("Attribute `{}` is not sortable. {}",
|
||||
.field,
|
||||
@ -124,6 +128,26 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
}
|
||||
)]
|
||||
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
|
||||
#[error("Attribute `{}` is not facet-searchable. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(),
|
||||
false => format!("Available facet-searchable attributes are: `{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
|
||||
),
|
||||
}
|
||||
)]
|
||||
InvalidFacetSearchFacetName { field: String, valid_fields: BTreeSet<String> },
|
||||
#[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",
|
||||
.field,
|
||||
.valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
|
||||
)]
|
||||
InvalidSearchableAttribute {
|
||||
field: String,
|
||||
valid_fields: BTreeSet<String>,
|
||||
hidden_fields: bool,
|
||||
},
|
||||
#[error("{}", HeedError::BadOpenOptions)]
|
||||
InvalidLmdbOpenOptions,
|
||||
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
|
||||
|
23
milli/src/heed_codec/fst_set_codec.rs
Normal file
23
milli/src/heed_codec/fst_set_codec.rs
Normal file
@ -0,0 +1,23 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use fst::Set;
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
/// A codec for values of type `Set<&[u8]>`.
|
||||
pub struct FstSetCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for FstSetCodec {
|
||||
type EItem = Set<Vec<u8>>;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item.as_fst().as_bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for FstSetCodec {
|
||||
type DItem = Set<&'a [u8]>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Set::new(bytes).ok()
|
||||
}
|
||||
}
|
@ -2,6 +2,7 @@ mod beu32_str_codec;
|
||||
mod byte_slice_ref;
|
||||
pub mod facet;
|
||||
mod field_id_word_count_codec;
|
||||
mod fst_set_codec;
|
||||
mod obkv_codec;
|
||||
mod roaring_bitmap;
|
||||
mod roaring_bitmap_length;
|
||||
@ -15,6 +16,7 @@ pub use str_ref::StrRefCodec;
|
||||
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||
pub use self::fst_set_codec::FstSetCodec;
|
||||
pub use self::obkv_codec::ObkvCodec;
|
||||
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
pub use self::roaring_bitmap_length::{
|
||||
@ -23,3 +25,9 @@ pub use self::roaring_bitmap_length::{
|
||||
pub use self::script_language_codec::ScriptLanguageCodec;
|
||||
pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
|
||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||
|
||||
pub trait BytesDecodeOwned {
|
||||
type DItem;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem>;
|
||||
}
|
||||
|
@ -2,8 +2,11 @@ use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::mem::size_of;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
pub struct BoRoaringBitmapCodec;
|
||||
|
||||
impl BoRoaringBitmapCodec {
|
||||
@ -13,7 +16,7 @@ impl BoRoaringBitmapCodec {
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
|
||||
impl BytesDecode<'_> for BoRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
@ -28,6 +31,14 @@ impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for BoRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
|
@ -5,6 +5,8 @@ use std::mem::size_of;
|
||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
/// This is the limit where using a byteorder became less size efficient
|
||||
/// than using a direct roaring encoding, it is also the point where we are able
|
||||
/// to determine the encoding used only by using the array of bytes length.
|
||||
@ -103,6 +105,14 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for CboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Self::deserialize_from(bytes).ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
|
@ -2,6 +2,8 @@ use std::borrow::Cow;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
pub struct RoaringBitmapCodec;
|
||||
|
||||
impl heed::BytesDecode<'_> for RoaringBitmapCodec {
|
||||
@ -12,6 +14,14 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec {
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for RoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringBitmap::deserialize_from(bytes).ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
|
@ -1,11 +1,23 @@
|
||||
use std::mem;
|
||||
|
||||
use heed::BytesDecode;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
pub struct BoRoaringBitmapLenCodec;
|
||||
|
||||
impl heed::BytesDecode<'_> for BoRoaringBitmapLenCodec {
|
||||
impl BytesDecode<'_> for BoRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Some((bytes.len() / mem::size_of::<u32>()) as u64)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for BoRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
|
@ -1,11 +1,14 @@
|
||||
use std::mem;
|
||||
|
||||
use heed::BytesDecode;
|
||||
|
||||
use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
|
||||
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
pub struct CboRoaringBitmapLenCodec;
|
||||
|
||||
impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec {
|
||||
impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
@ -20,3 +23,11 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for CboRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,8 @@ use std::mem;
|
||||
|
||||
use byteorder::{LittleEndian, ReadBytesExt};
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
|
||||
const SERIAL_COOKIE: u16 = 12347;
|
||||
|
||||
@ -59,6 +61,14 @@ impl heed::BytesDecode<'_> for RoaringBitmapLenCodec {
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for RoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use heed::BytesEncode;
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::fs::File;
|
||||
use std::mem::size_of;
|
||||
use std::path::Path;
|
||||
@ -8,10 +8,12 @@ use charabia::{Language, Script};
|
||||
use heed::flags::Flags;
|
||||
use heed::types::*;
|
||||
use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn};
|
||||
use rand_pcg::Pcg32;
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::distance::DotProduct;
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::facet::FacetType;
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
@ -19,13 +21,18 @@ use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
FieldIdCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
|
||||
use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
|
||||
use crate::readable_slices::ReadableSlices;
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
|
||||
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32,
|
||||
OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16,
|
||||
BEU32,
|
||||
};
|
||||
|
||||
/// The HNSW data-structure that we serialize, fill and search in.
|
||||
pub type Hnsw = hnsw::Hnsw<DotProduct, Vec<f32>, Pcg32, 12, 24>;
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;
|
||||
|
||||
@ -42,6 +49,10 @@ pub mod main_key {
|
||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
|
||||
pub const GEO_RTREE_KEY: &str = "geo-rtree";
|
||||
/// The prefix of the key that is used to store the, potential big, HNSW structure.
|
||||
/// It is concatenated with a big-endian encoded number (non-human readable).
|
||||
/// e.g. vector-hnsw0x0032.
|
||||
pub const VECTOR_HNSW_KEY_PREFIX: &str = "vector-hnsw";
|
||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||
@ -49,8 +60,12 @@ pub mod main_key {
|
||||
pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
|
||||
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
|
||||
pub const STOP_WORDS_KEY: &str = "stop-words";
|
||||
pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens";
|
||||
pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens";
|
||||
pub const DICTIONARY_KEY: &str = "dictionary";
|
||||
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
|
||||
pub const SYNONYMS_KEY: &str = "synonyms";
|
||||
pub const USER_DEFINED_SYNONYMS_KEY: &str = "user-defined-synonyms";
|
||||
pub const WORDS_FST_KEY: &str = "words-fst";
|
||||
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
||||
pub const CREATED_AT_KEY: &str = "created-at";
|
||||
@ -61,6 +76,7 @@ pub mod main_key {
|
||||
pub const EXACT_WORDS: &str = "exact-words";
|
||||
pub const EXACT_ATTRIBUTES: &str = "exact-attributes";
|
||||
pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet";
|
||||
pub const SORT_FACET_VALUES_BY: &str = "sort-facet-values-by";
|
||||
pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
|
||||
}
|
||||
|
||||
@ -84,8 +100,10 @@ pub mod db_name {
|
||||
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
|
||||
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
|
||||
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
||||
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
|
||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||
pub const VECTOR_ID_DOCID: &str = "vector-id-docids";
|
||||
pub const DOCUMENTS: &str = "documents";
|
||||
pub const SCRIPT_LANGUAGE_DOCIDS: &str = "script_language_docids";
|
||||
}
|
||||
@ -143,12 +161,17 @@ pub struct Index {
|
||||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
|
||||
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id of the string facets with an FST containing all the facets values.
|
||||
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
|
||||
|
||||
/// Maps the document id, the facet field id and the numbers.
|
||||
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
|
||||
/// Maps the document id, the facet field id and the strings.
|
||||
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
|
||||
|
||||
/// Maps a vector id to the document id that have it.
|
||||
pub vector_id_docid: Database<OwnedType<BEU32>, OwnedType<BEU32>>,
|
||||
|
||||
/// Maps the document id to the document as an obkv store.
|
||||
pub(crate) documents: Database<OwnedType<BEU32>, ObkvCodec>,
|
||||
}
|
||||
@ -162,7 +185,7 @@ impl Index {
|
||||
) -> Result<Index> {
|
||||
use db_name::*;
|
||||
|
||||
options.max_dbs(23);
|
||||
options.max_dbs(24);
|
||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||
|
||||
let env = options.open(path)?;
|
||||
@ -192,17 +215,18 @@ impl Index {
|
||||
let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
|
||||
let facet_id_string_docids =
|
||||
env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
|
||||
let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?;
|
||||
let facet_id_exists_docids =
|
||||
env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
|
||||
let facet_id_is_null_docids =
|
||||
env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
|
||||
let facet_id_is_empty_docids =
|
||||
env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;
|
||||
|
||||
let field_id_docid_facet_f64s =
|
||||
env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
|
||||
let field_id_docid_facet_strings =
|
||||
env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?;
|
||||
let vector_id_docid = env.create_database(&mut wtxn, Some(VECTOR_ID_DOCID))?;
|
||||
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
@ -226,11 +250,13 @@ impl Index {
|
||||
field_id_word_count_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_string_fst,
|
||||
facet_id_exists_docids,
|
||||
facet_id_is_null_docids,
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
vector_id_docid,
|
||||
documents,
|
||||
})
|
||||
}
|
||||
@ -502,6 +528,56 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* vector HNSW */
|
||||
|
||||
/// Writes the provided `hnsw`.
|
||||
pub(crate) fn put_vector_hnsw(&self, wtxn: &mut RwTxn, hnsw: &Hnsw) -> heed::Result<()> {
|
||||
// We must delete all the chunks before we write the new HNSW chunks.
|
||||
self.delete_vector_hnsw(wtxn)?;
|
||||
|
||||
let chunk_size = 1024 * 1024 * (1024 + 512); // 1.5 GiB
|
||||
let bytes = bincode::serialize(hnsw).map_err(|_| heed::Error::Encoding)?;
|
||||
for (i, chunk) in bytes.chunks(chunk_size).enumerate() {
|
||||
let i = i as u32;
|
||||
let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec();
|
||||
key.extend_from_slice(&i.to_be_bytes());
|
||||
self.main.put::<_, ByteSlice, ByteSlice>(wtxn, &key, chunk)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete the `hnsw`.
|
||||
pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
let mut iter = self.main.prefix_iter_mut::<_, ByteSlice, DecodeIgnore>(
|
||||
wtxn,
|
||||
main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes(),
|
||||
)?;
|
||||
let mut deleted = false;
|
||||
while iter.next().transpose()?.is_some() {
|
||||
// We do not keep a reference to the key or the value.
|
||||
unsafe { deleted |= iter.del_current()? };
|
||||
}
|
||||
Ok(deleted)
|
||||
}
|
||||
|
||||
/// Returns the `hnsw`.
|
||||
pub fn vector_hnsw(&self, rtxn: &RoTxn) -> Result<Option<Hnsw>> {
|
||||
let mut slices = Vec::new();
|
||||
for result in
|
||||
self.main.prefix_iter::<_, Str, ByteSlice>(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)?
|
||||
{
|
||||
let (_, slice) = result?;
|
||||
slices.push(slice);
|
||||
}
|
||||
|
||||
if slices.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
let readable_slices: ReadableSlices<_> = slices.into_iter().collect();
|
||||
Ok(Some(bincode::deserialize_from(readable_slices).map_err(|_| heed::Error::Decoding)?))
|
||||
}
|
||||
}
|
||||
|
||||
/* field distribution */
|
||||
|
||||
/// Writes the field distribution which associates every field name with
|
||||
@ -976,18 +1052,116 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* non separator tokens */
|
||||
|
||||
pub(crate) fn put_non_separator_tokens(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
|
||||
}
|
||||
|
||||
pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>(
|
||||
rtxn,
|
||||
main_key::NON_SEPARATOR_TOKENS_KEY,
|
||||
)?)
|
||||
}
|
||||
|
||||
/* separator tokens */
|
||||
|
||||
pub(crate) fn put_separator_tokens(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY)
|
||||
}
|
||||
|
||||
pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
|
||||
}
|
||||
|
||||
/* separators easing method */
|
||||
|
||||
pub fn allowed_separators(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
let default_separators =
|
||||
charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string());
|
||||
let mut separators: Option<BTreeSet<_>> = None;
|
||||
if let Some(mut separator_tokens) = self.separator_tokens(rtxn)? {
|
||||
separator_tokens.extend(default_separators.clone());
|
||||
separators = Some(separator_tokens);
|
||||
}
|
||||
|
||||
if let Some(non_separator_tokens) = self.non_separator_tokens(rtxn)? {
|
||||
separators = separators
|
||||
.or_else(|| Some(default_separators.collect()))
|
||||
.map(|separators| &separators - &non_separator_tokens);
|
||||
}
|
||||
|
||||
Ok(separators)
|
||||
}
|
||||
|
||||
/* dictionary */
|
||||
|
||||
pub(crate) fn put_dictionary(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY)
|
||||
}
|
||||
|
||||
pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?)
|
||||
}
|
||||
|
||||
/* synonyms */
|
||||
|
||||
pub(crate) fn put_synonyms(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
|
||||
user_defined_synonyms: &BTreeMap<String, Vec<String>>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?;
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(
|
||||
wtxn,
|
||||
main_key::USER_DEFINED_SYNONYMS_KEY,
|
||||
user_defined_synonyms,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?;
|
||||
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
|
||||
}
|
||||
|
||||
pub fn user_defined_synonyms(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
) -> heed::Result<BTreeMap<String, Vec<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
|
||||
@ -1228,6 +1402,31 @@ impl Index {
|
||||
self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET)
|
||||
}
|
||||
|
||||
pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<HashMap<String, OrderBy>> {
|
||||
let mut orders = self
|
||||
.main
|
||||
.get::<_, Str, SerdeJson<HashMap<String, OrderBy>>>(
|
||||
txn,
|
||||
main_key::SORT_FACET_VALUES_BY,
|
||||
)?
|
||||
.unwrap_or_default();
|
||||
// Insert the default ordering if it is not already overwritten by the user.
|
||||
orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic);
|
||||
Ok(orders)
|
||||
}
|
||||
|
||||
pub(crate) fn put_sort_facet_values_by(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
val: &HashMap<String, OrderBy>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(txn, main_key::SORT_FACET_VALUES_BY, &val)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(txn, main_key::SORT_FACET_VALUES_BY)
|
||||
}
|
||||
|
||||
pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
|
||||
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
||||
}
|
||||
@ -2488,8 +2687,12 @@ pub(crate) mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let search = Search::new(&rtxn, &index);
|
||||
let SearchResult { matching_words: _, candidates: _, mut documents_ids } =
|
||||
search.execute().unwrap();
|
||||
let SearchResult {
|
||||
matching_words: _,
|
||||
candidates: _,
|
||||
document_scores: _,
|
||||
mut documents_ids,
|
||||
} = search.execute().unwrap();
|
||||
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
|
||||
documents_ids.sort_unstable();
|
||||
let docs = index.documents(&rtxn, documents_ids).unwrap();
|
||||
|
@ -10,6 +10,7 @@ pub mod documents;
|
||||
|
||||
mod asc_desc;
|
||||
mod criterion;
|
||||
pub mod distance;
|
||||
mod error;
|
||||
mod external_documents_ids;
|
||||
pub mod facet;
|
||||
@ -17,6 +18,8 @@ mod fields_ids_map;
|
||||
pub mod heed_codec;
|
||||
pub mod index;
|
||||
pub mod proximity;
|
||||
mod readable_slices;
|
||||
pub mod score_details;
|
||||
mod search;
|
||||
pub mod update;
|
||||
|
||||
@ -29,6 +32,7 @@ use std::convert::{TryFrom, TryInto};
|
||||
use std::hash::BuildHasherDefault;
|
||||
|
||||
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
||||
pub use distance::dot_product_similarity;
|
||||
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
||||
use fxhash::{FxHasher32, FxHasher64};
|
||||
pub use grenad::CompressionType;
|
||||
@ -53,8 +57,9 @@ pub use self::heed_codec::{
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{
|
||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
|
||||
SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
|
||||
MatchingWords, OrderBy, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
|
||||
DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||
@ -283,6 +288,35 @@ pub fn normalize_facet(original: &str) -> String {
|
||||
CompatibilityDecompositionNormalizer.normalize_str(original.trim()).to_lowercase()
|
||||
}
|
||||
|
||||
/// Represents either a vector or an array of multiple vectors.
|
||||
#[derive(serde::Serialize, serde::Deserialize, Debug)]
|
||||
#[serde(transparent)]
|
||||
pub struct VectorOrArrayOfVectors {
|
||||
#[serde(with = "either::serde_untagged")]
|
||||
inner: either::Either<Vec<f32>, Vec<Vec<f32>>>,
|
||||
}
|
||||
|
||||
impl VectorOrArrayOfVectors {
|
||||
pub fn into_array_of_vectors(self) -> Vec<Vec<f32>> {
|
||||
match self.inner {
|
||||
either::Either::Left(vector) => vec![vector],
|
||||
either::Either::Right(vectors) => vectors,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalize a vector by dividing the dimensions by the length of it.
|
||||
pub fn normalize_vector(mut vector: Vec<f32>) -> Vec<f32> {
|
||||
let squared: f32 = vector.iter().map(|x| x * x).sum();
|
||||
let length = squared.sqrt();
|
||||
if length <= f32::EPSILON {
|
||||
vector
|
||||
} else {
|
||||
vector.iter_mut().for_each(|x| *x /= length);
|
||||
vector
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json::json;
|
||||
|
85
milli/src/readable_slices.rs
Normal file
85
milli/src/readable_slices.rs
Normal file
@ -0,0 +1,85 @@
|
||||
use std::io::{self, Read};
|
||||
use std::iter::FromIterator;
|
||||
|
||||
pub struct ReadableSlices<A> {
|
||||
inner: Vec<A>,
|
||||
pos: u64,
|
||||
}
|
||||
|
||||
impl<A> FromIterator<A> for ReadableSlices<A> {
|
||||
fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> Self {
|
||||
ReadableSlices { inner: iter.into_iter().collect(), pos: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: AsRef<[u8]>> Read for ReadableSlices<A> {
|
||||
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
|
||||
let original_buf_len = buf.len();
|
||||
|
||||
// We explore the list of slices to find the one where we must start reading.
|
||||
let mut pos = self.pos;
|
||||
let index = match self
|
||||
.inner
|
||||
.iter()
|
||||
.map(|s| s.as_ref().len() as u64)
|
||||
.position(|size| pos.checked_sub(size).map(|p| pos = p).is_none())
|
||||
{
|
||||
Some(index) => index,
|
||||
None => return Ok(0),
|
||||
};
|
||||
|
||||
let mut inner_pos = pos as usize;
|
||||
for slice in &self.inner[index..] {
|
||||
let slice = &slice.as_ref()[inner_pos..];
|
||||
|
||||
if buf.len() > slice.len() {
|
||||
// We must exhaust the current slice and go to the next one there is not enough here.
|
||||
buf[..slice.len()].copy_from_slice(slice);
|
||||
buf = &mut buf[slice.len()..];
|
||||
inner_pos = 0;
|
||||
} else {
|
||||
// There is enough in this slice to fill the remaining bytes of the buffer.
|
||||
// Let's break just after filling it.
|
||||
buf.copy_from_slice(&slice[..buf.len()]);
|
||||
buf = &mut [];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let written = original_buf_len - buf.len();
|
||||
self.pos += written as u64;
|
||||
Ok(written)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::io::Read;
|
||||
|
||||
use super::ReadableSlices;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let data: Vec<_> = (0..100).collect();
|
||||
let splits: Vec<_> = data.chunks(3).collect();
|
||||
let mut rdslices: ReadableSlices<_> = splits.into_iter().collect();
|
||||
|
||||
let mut output = Vec::new();
|
||||
let length = rdslices.read_to_end(&mut output).unwrap();
|
||||
assert_eq!(length, data.len());
|
||||
assert_eq!(output, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn small_reads() {
|
||||
let data: Vec<_> = (0..u8::MAX).collect();
|
||||
let splits: Vec<_> = data.chunks(27).collect();
|
||||
let mut rdslices: ReadableSlices<_> = splits.into_iter().collect();
|
||||
|
||||
let buffer = &mut [0; 45];
|
||||
let length = rdslices.read(buffer).unwrap();
|
||||
let expected: Vec<_> = (0..buffer.len() as u8).collect();
|
||||
assert_eq!(length, buffer.len());
|
||||
assert_eq!(buffer, &expected[..]);
|
||||
}
|
||||
}
|
313
milli/src/score_details.rs
Normal file
313
milli/src/score_details.rs
Normal file
@ -0,0 +1,313 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::distance_between_two_points;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum ScoreDetails {
|
||||
Words(Words),
|
||||
Typo(Typo),
|
||||
Proximity(Rank),
|
||||
Fid(Rank),
|
||||
Position(Rank),
|
||||
ExactAttribute(ExactAttribute),
|
||||
Exactness(Rank),
|
||||
Sort(Sort),
|
||||
GeoSort(GeoSort),
|
||||
}
|
||||
|
||||
impl ScoreDetails {
|
||||
pub fn local_score(&self) -> Option<f64> {
|
||||
self.rank().map(Rank::local_score)
|
||||
}
|
||||
|
||||
pub fn rank(&self) -> Option<Rank> {
|
||||
match self {
|
||||
ScoreDetails::Words(details) => Some(details.rank()),
|
||||
ScoreDetails::Typo(details) => Some(details.rank()),
|
||||
ScoreDetails::Proximity(details) => Some(*details),
|
||||
ScoreDetails::Fid(details) => Some(*details),
|
||||
ScoreDetails::Position(details) => Some(*details),
|
||||
ScoreDetails::ExactAttribute(details) => Some(details.rank()),
|
||||
ScoreDetails::Exactness(details) => Some(*details),
|
||||
ScoreDetails::Sort(_) => None,
|
||||
ScoreDetails::GeoSort(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn global_score<'a>(details: impl Iterator<Item = &'a Self>) -> f64 {
|
||||
Rank::global_score(details.filter_map(Self::rank))
|
||||
}
|
||||
|
||||
/// Panics
|
||||
///
|
||||
/// - If Position is not preceded by Fid
|
||||
/// - If Exactness is not preceded by ExactAttribute
|
||||
pub fn to_json_map<'a>(
|
||||
details: impl Iterator<Item = &'a Self>,
|
||||
) -> serde_json::Map<String, serde_json::Value> {
|
||||
let mut order = 0;
|
||||
let mut fid_details = None;
|
||||
let mut details_map = serde_json::Map::default();
|
||||
for details in details {
|
||||
match details {
|
||||
ScoreDetails::Words(words) => {
|
||||
let words_details = serde_json::json!({
|
||||
"order": order,
|
||||
"matchingWords": words.matching_words,
|
||||
"maxMatchingWords": words.max_matching_words,
|
||||
"score": words.rank().local_score(),
|
||||
});
|
||||
details_map.insert("words".into(), words_details);
|
||||
order += 1;
|
||||
}
|
||||
ScoreDetails::Typo(typo) => {
|
||||
let typo_details = serde_json::json!({
|
||||
"order": order,
|
||||
"typoCount": typo.typo_count,
|
||||
"maxTypoCount": typo.max_typo_count,
|
||||
"score": typo.rank().local_score(),
|
||||
});
|
||||
details_map.insert("typo".into(), typo_details);
|
||||
order += 1;
|
||||
}
|
||||
ScoreDetails::Proximity(proximity) => {
|
||||
let proximity_details = serde_json::json!({
|
||||
"order": order,
|
||||
"score": proximity.local_score(),
|
||||
});
|
||||
details_map.insert("proximity".into(), proximity_details);
|
||||
order += 1;
|
||||
}
|
||||
ScoreDetails::Fid(fid) => {
|
||||
// copy the rank for future use in Position.
|
||||
fid_details = Some(*fid);
|
||||
// For now, fid is a virtual rule always followed by the "position" rule
|
||||
let fid_details = serde_json::json!({
|
||||
"order": order,
|
||||
"attribute_ranking_order_score": fid.local_score(),
|
||||
});
|
||||
details_map.insert("attribute".into(), fid_details);
|
||||
order += 1;
|
||||
}
|
||||
ScoreDetails::Position(position) => {
|
||||
// For now, position is a virtual rule always preceded by the "fid" rule
|
||||
let attribute_details = details_map
|
||||
.get_mut("attribute")
|
||||
.expect("position not preceded by attribute");
|
||||
let attribute_details = attribute_details
|
||||
.as_object_mut()
|
||||
.expect("attribute details was not an object");
|
||||
let Some(fid_details) = fid_details else {
|
||||
unimplemented!("position not preceded by attribute");
|
||||
};
|
||||
|
||||
attribute_details
|
||||
.insert("query_word_distance_score".into(), position.local_score().into());
|
||||
let score = Rank::global_score([fid_details, *position].iter().copied());
|
||||
attribute_details.insert("score".into(), score.into());
|
||||
|
||||
// do not update the order since this was already done by fid
|
||||
}
|
||||
ScoreDetails::ExactAttribute(exact_attribute) => {
|
||||
let exactness_details = serde_json::json!({
|
||||
"order": order,
|
||||
"matchType": exact_attribute,
|
||||
"score": exact_attribute.rank().local_score(),
|
||||
});
|
||||
details_map.insert("exactness".into(), exactness_details);
|
||||
order += 1;
|
||||
}
|
||||
ScoreDetails::Exactness(details) => {
|
||||
// For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule
|
||||
let exactness_details = details_map
|
||||
.get_mut("exactness")
|
||||
.expect("Exactness not preceded by exactAttribute");
|
||||
let exactness_details = exactness_details
|
||||
.as_object_mut()
|
||||
.expect("exactness details was not an object");
|
||||
if exactness_details.get("matchType").expect("missing 'matchType'")
|
||||
== &serde_json::json!(ExactAttribute::NoExactMatch)
|
||||
{
|
||||
let score = Rank::global_score(
|
||||
[ExactAttribute::NoExactMatch.rank(), *details].iter().copied(),
|
||||
);
|
||||
*exactness_details.get_mut("score").expect("missing score") = score.into();
|
||||
}
|
||||
// do not update the order since this was already done by exactAttribute
|
||||
}
|
||||
ScoreDetails::Sort(details) => {
|
||||
let sort = if details.redacted {
|
||||
format!("<hidden-rule-{order}>")
|
||||
} else {
|
||||
format!(
|
||||
"{}:{}",
|
||||
details.field_name,
|
||||
if details.ascending { "asc" } else { "desc" }
|
||||
)
|
||||
};
|
||||
let value =
|
||||
if details.redacted { "<hidden>".into() } else { details.value.clone() };
|
||||
let sort_details = serde_json::json!({
|
||||
"order": order,
|
||||
"value": value,
|
||||
});
|
||||
details_map.insert(sort, sort_details);
|
||||
order += 1;
|
||||
}
|
||||
ScoreDetails::GeoSort(details) => {
|
||||
let sort = format!(
|
||||
"_geoPoint({}, {}):{}",
|
||||
details.target_point[0],
|
||||
details.target_point[1],
|
||||
if details.ascending { "asc" } else { "desc" }
|
||||
);
|
||||
let point = if let Some(value) = details.value {
|
||||
serde_json::json!({ "lat": value[0], "lng": value[1]})
|
||||
} else {
|
||||
serde_json::Value::Null
|
||||
};
|
||||
let sort_details = serde_json::json!({
|
||||
"order": order,
|
||||
"value": point,
|
||||
"distance": details.distance(),
|
||||
});
|
||||
details_map.insert(sort, sort_details);
|
||||
order += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
details_map
|
||||
}
|
||||
}
|
||||
|
||||
/// The strategy to compute scores.
|
||||
///
|
||||
/// It makes sense to pass down this strategy to the internals of the search, because
|
||||
/// some optimizations (today, mainly skipping ranking rules for universes of a single document)
|
||||
/// are not correct to do when computing the scores.
|
||||
///
|
||||
/// This strategy could feasibly be extended to differentiate between the normalized score and the
|
||||
/// detailed scores, but it is not useful today as the normalized score is *derived from* the
|
||||
/// detailed scores.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||
pub enum ScoringStrategy {
|
||||
/// Don't compute scores
|
||||
#[default]
|
||||
Skip,
|
||||
/// Compute detailed scores
|
||||
Detailed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Words {
|
||||
pub matching_words: u32,
|
||||
pub max_matching_words: u32,
|
||||
}
|
||||
|
||||
impl Words {
|
||||
pub fn rank(&self) -> Rank {
|
||||
Rank { rank: self.matching_words, max_rank: self.max_matching_words }
|
||||
}
|
||||
|
||||
pub(crate) fn from_rank(rank: Rank) -> Words {
|
||||
Words { matching_words: rank.rank, max_matching_words: rank.max_rank }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Typo {
|
||||
pub typo_count: u32,
|
||||
pub max_typo_count: u32,
|
||||
}
|
||||
|
||||
impl Typo {
|
||||
pub fn rank(&self) -> Rank {
|
||||
Rank {
|
||||
rank: self.max_typo_count - self.typo_count + 1,
|
||||
max_rank: (self.max_typo_count + 1),
|
||||
}
|
||||
}
|
||||
|
||||
// max_rank = max_typo + 1
|
||||
// max_typo = max_rank - 1
|
||||
//
|
||||
// rank = max_typo - typo + 1
|
||||
// rank = max_rank - 1 - typo + 1
|
||||
// rank + typo = max_rank
|
||||
// typo = max_rank - rank
|
||||
pub fn from_rank(rank: Rank) -> Typo {
|
||||
Typo { typo_count: rank.max_rank - rank.rank, max_typo_count: rank.max_rank - 1 }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Rank {
|
||||
/// The ordinal rank, such that `max_rank` is the first rank, and 0 is the last rank.
|
||||
///
|
||||
/// The higher the better. Documents with a rank of 0 have a score of 0 and are typically never returned
|
||||
/// (they don't match the query).
|
||||
pub rank: u32,
|
||||
/// The maximum possible rank. Documents with this rank have a score of 1.
|
||||
///
|
||||
/// The max rank should not be 0.
|
||||
pub max_rank: u32,
|
||||
}
|
||||
|
||||
impl Rank {
|
||||
pub fn local_score(self) -> f64 {
|
||||
self.rank as f64 / self.max_rank as f64
|
||||
}
|
||||
|
||||
pub fn global_score(details: impl Iterator<Item = Self>) -> f64 {
|
||||
let mut rank = Rank { rank: 1, max_rank: 1 };
|
||||
for inner_rank in details {
|
||||
rank.rank -= 1;
|
||||
|
||||
rank.rank *= inner_rank.max_rank;
|
||||
rank.max_rank *= inner_rank.max_rank;
|
||||
|
||||
rank.rank += inner_rank.rank;
|
||||
}
|
||||
rank.local_score()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum ExactAttribute {
|
||||
ExactMatch,
|
||||
MatchesStart,
|
||||
NoExactMatch,
|
||||
}
|
||||
|
||||
impl ExactAttribute {
|
||||
pub fn rank(&self) -> Rank {
|
||||
let rank = match self {
|
||||
ExactAttribute::ExactMatch => 3,
|
||||
ExactAttribute::MatchesStart => 2,
|
||||
ExactAttribute::NoExactMatch => 1,
|
||||
};
|
||||
Rank { rank, max_rank: 3 }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Sort {
|
||||
pub field_name: String,
|
||||
pub ascending: bool,
|
||||
pub redacted: bool,
|
||||
pub value: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
|
||||
pub struct GeoSort {
|
||||
pub target_point: [f64; 2],
|
||||
pub ascending: bool,
|
||||
pub value: Option<[f64; 2]>,
|
||||
}
|
||||
|
||||
impl GeoSort {
|
||||
pub fn distance(&self) -> Option<f64> {
|
||||
self.value.map(|value| distance_between_two_points(&self.target_point, &value))
|
||||
}
|
||||
}
|
@ -1,19 +1,22 @@
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::ops::ControlFlow;
|
||||
use std::{fmt, mem};
|
||||
|
||||
use heed::types::ByteSlice;
|
||||
use heed::BytesDecode;
|
||||
use indexmap::IndexMap;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::UserError;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
OrderedF64Codec,
|
||||
FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
|
||||
use crate::search::facet::facet_distribution_iter;
|
||||
use crate::search::facet::facet_distribution_iter::{
|
||||
count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
|
||||
};
|
||||
use crate::{FieldId, Index, Result};
|
||||
|
||||
/// The default number of values by facets that will
|
||||
@ -24,10 +27,21 @@ pub const DEFAULT_VALUES_PER_FACET: usize = 100;
|
||||
/// the system to choose between one algorithm or another.
|
||||
const CANDIDATES_THRESHOLD: u64 = 3000;
|
||||
|
||||
/// How should we fetch the facets?
|
||||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum OrderBy {
|
||||
/// By lexicographic order...
|
||||
#[default]
|
||||
Lexicographic,
|
||||
/// Or by number of docids in common?
|
||||
Count,
|
||||
}
|
||||
|
||||
pub struct FacetDistribution<'a> {
|
||||
facets: Option<HashSet<String>>,
|
||||
facets: Option<HashMap<String, OrderBy>>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
max_values_per_facet: usize,
|
||||
default_order_by: OrderBy,
|
||||
rtxn: &'a heed::RoTxn<'a>,
|
||||
index: &'a Index,
|
||||
}
|
||||
@ -38,13 +52,22 @@ impl<'a> FacetDistribution<'a> {
|
||||
facets: None,
|
||||
candidates: None,
|
||||
max_values_per_facet: DEFAULT_VALUES_PER_FACET,
|
||||
default_order_by: OrderBy::default(),
|
||||
rtxn,
|
||||
index,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn facets<I: IntoIterator<Item = A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self {
|
||||
self.facets = Some(names.into_iter().map(|s| s.as_ref().to_string()).collect());
|
||||
pub fn facets<I: IntoIterator<Item = (A, OrderBy)>, A: AsRef<str>>(
|
||||
&mut self,
|
||||
names_ordered_by: I,
|
||||
) -> &mut Self {
|
||||
self.facets = Some(
|
||||
names_ordered_by
|
||||
.into_iter()
|
||||
.map(|(name, order_by)| (name.as_ref().to_string(), order_by))
|
||||
.collect(),
|
||||
);
|
||||
self
|
||||
}
|
||||
|
||||
@ -53,6 +76,11 @@ impl<'a> FacetDistribution<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn default_order_by(&mut self, order_by: OrderBy) -> &mut Self {
|
||||
self.default_order_by = order_by;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self {
|
||||
self.candidates = Some(candidates);
|
||||
self
|
||||
@ -65,7 +93,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
field_id: FieldId,
|
||||
facet_type: FacetType,
|
||||
candidates: &RoaringBitmap,
|
||||
distribution: &mut BTreeMap<String, u64>,
|
||||
distribution: &mut IndexMap<String, u64>,
|
||||
) -> heed::Result<()> {
|
||||
match facet_type {
|
||||
FacetType::Number => {
|
||||
@ -134,9 +162,15 @@ impl<'a> FacetDistribution<'a> {
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
candidates: &RoaringBitmap,
|
||||
distribution: &mut BTreeMap<String, u64>,
|
||||
order_by: OrderBy,
|
||||
distribution: &mut IndexMap<String, u64>,
|
||||
) -> heed::Result<()> {
|
||||
facet_distribution_iter::iterate_over_facet_distribution(
|
||||
let search_function = match order_by {
|
||||
OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution,
|
||||
OrderBy::Count => count_iterate_over_facet_distribution,
|
||||
};
|
||||
|
||||
search_function(
|
||||
self.rtxn,
|
||||
self.index
|
||||
.facet_id_f64_docids
|
||||
@ -159,9 +193,15 @@ impl<'a> FacetDistribution<'a> {
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
candidates: &RoaringBitmap,
|
||||
distribution: &mut BTreeMap<String, u64>,
|
||||
order_by: OrderBy,
|
||||
distribution: &mut IndexMap<String, u64>,
|
||||
) -> heed::Result<()> {
|
||||
facet_distribution_iter::iterate_over_facet_distribution(
|
||||
let search_function = match order_by {
|
||||
OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution,
|
||||
OrderBy::Count => count_iterate_over_facet_distribution,
|
||||
};
|
||||
|
||||
search_function(
|
||||
self.rtxn,
|
||||
self.index
|
||||
.facet_id_string_docids
|
||||
@ -189,93 +229,47 @@ impl<'a> FacetDistribution<'a> {
|
||||
)
|
||||
}
|
||||
|
||||
/// Placeholder search, a.k.a. no candidates were specified. We iterate throught the
|
||||
/// facet values one by one and iterate on the facet level 0 for numbers.
|
||||
fn facet_values_from_raw_facet_database(
|
||||
fn facet_values(
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<BTreeMap<String, u64>> {
|
||||
let mut distribution = BTreeMap::new();
|
||||
|
||||
let db = self.index.facet_id_f64_docids;
|
||||
let mut prefix = vec![];
|
||||
prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||
prefix.push(0); // read values from level 0 only
|
||||
|
||||
let iter = db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||
.remap_types::<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>();
|
||||
|
||||
for result in iter {
|
||||
let (key, value) = result?;
|
||||
distribution.insert(key.left_bound.to_string(), value.bitmap.len());
|
||||
if distribution.len() == self.max_values_per_facet {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let iter = self
|
||||
.index
|
||||
.facet_id_string_docids
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||
.remap_types::<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
|
||||
|
||||
for result in iter {
|
||||
let (key, value) = result?;
|
||||
|
||||
let docid = value.bitmap.iter().next().unwrap();
|
||||
let key: (FieldId, _, &'a str) = (field_id, docid, key.left_bound);
|
||||
let original_string =
|
||||
self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?.unwrap().to_owned();
|
||||
|
||||
distribution.insert(original_string, value.bitmap.len());
|
||||
if distribution.len() == self.max_values_per_facet {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(distribution)
|
||||
}
|
||||
|
||||
fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> {
|
||||
order_by: OrderBy,
|
||||
) -> heed::Result<IndexMap<String, u64>> {
|
||||
use FacetType::{Number, String};
|
||||
|
||||
match self.candidates {
|
||||
Some(ref candidates) => {
|
||||
let mut distribution = IndexMap::new();
|
||||
match (order_by, &self.candidates) {
|
||||
(OrderBy::Lexicographic, Some(cnd)) if cnd.len() <= CANDIDATES_THRESHOLD => {
|
||||
// Classic search, candidates were specified, we must return facet values only related
|
||||
// to those candidates. We also enter here for facet strings for performance reasons.
|
||||
let mut distribution = BTreeMap::new();
|
||||
if candidates.len() <= CANDIDATES_THRESHOLD {
|
||||
self.facet_distribution_from_documents(
|
||||
field_id,
|
||||
Number,
|
||||
candidates,
|
||||
&mut distribution,
|
||||
)?;
|
||||
self.facet_distribution_from_documents(
|
||||
field_id,
|
||||
String,
|
||||
candidates,
|
||||
&mut distribution,
|
||||
)?;
|
||||
} else {
|
||||
self.facet_numbers_distribution_from_facet_levels(
|
||||
field_id,
|
||||
candidates,
|
||||
&mut distribution,
|
||||
)?;
|
||||
self.facet_strings_distribution_from_facet_levels(
|
||||
field_id,
|
||||
candidates,
|
||||
&mut distribution,
|
||||
)?;
|
||||
}
|
||||
Ok(distribution)
|
||||
self.facet_distribution_from_documents(field_id, Number, cnd, &mut distribution)?;
|
||||
self.facet_distribution_from_documents(field_id, String, cnd, &mut distribution)?;
|
||||
}
|
||||
None => self.facet_values_from_raw_facet_database(field_id),
|
||||
}
|
||||
_ => {
|
||||
let universe;
|
||||
let candidates = match &self.candidates {
|
||||
Some(cnd) => cnd,
|
||||
None => {
|
||||
universe = self.index.documents_ids(self.rtxn)?;
|
||||
&universe
|
||||
}
|
||||
};
|
||||
|
||||
self.facet_numbers_distribution_from_facet_levels(
|
||||
field_id,
|
||||
candidates,
|
||||
order_by,
|
||||
&mut distribution,
|
||||
)?;
|
||||
self.facet_strings_distribution_from_facet_levels(
|
||||
field_id,
|
||||
candidates,
|
||||
order_by,
|
||||
&mut distribution,
|
||||
)?;
|
||||
}
|
||||
};
|
||||
|
||||
Ok(distribution)
|
||||
}
|
||||
|
||||
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
|
||||
@ -291,6 +285,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
Some(facets) => {
|
||||
let invalid_fields: HashSet<_> = facets
|
||||
.iter()
|
||||
.map(|(name, _)| name)
|
||||
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
|
||||
.collect();
|
||||
if !invalid_fields.is_empty() {
|
||||
@ -300,7 +295,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
}
|
||||
.into());
|
||||
} else {
|
||||
facets.clone()
|
||||
facets.iter().map(|(name, _)| name).cloned().collect()
|
||||
}
|
||||
}
|
||||
None => filterable_fields,
|
||||
@ -337,7 +332,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
Ok(distribution)
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
||||
pub fn execute(&self) -> Result<BTreeMap<String, IndexMap<String, u64>>> {
|
||||
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
||||
|
||||
@ -345,6 +340,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
Some(ref facets) => {
|
||||
let invalid_fields: HashSet<_> = facets
|
||||
.iter()
|
||||
.map(|(name, _)| name)
|
||||
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
|
||||
.collect();
|
||||
if !invalid_fields.is_empty() {
|
||||
@ -354,7 +350,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
}
|
||||
.into());
|
||||
} else {
|
||||
facets.clone()
|
||||
facets.iter().map(|(name, _)| name).cloned().collect()
|
||||
}
|
||||
}
|
||||
None => filterable_fields,
|
||||
@ -363,7 +359,12 @@ impl<'a> FacetDistribution<'a> {
|
||||
let mut distribution = BTreeMap::new();
|
||||
for (fid, name) in fields_ids_map.iter() {
|
||||
if crate::is_faceted(name, &fields) {
|
||||
let values = self.facet_values(fid)?;
|
||||
let order_by = self
|
||||
.facets
|
||||
.as_ref()
|
||||
.and_then(|facets| facets.get(name).copied())
|
||||
.unwrap_or(self.default_order_by);
|
||||
let values = self.facet_values(fid, order_by)?;
|
||||
distribution.insert(name.to_string(), values);
|
||||
}
|
||||
}
|
||||
@ -374,25 +375,34 @@ impl<'a> FacetDistribution<'a> {
|
||||
|
||||
impl fmt::Debug for FacetDistribution<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let FacetDistribution { facets, candidates, max_values_per_facet, rtxn: _, index: _ } =
|
||||
self;
|
||||
let FacetDistribution {
|
||||
facets,
|
||||
candidates,
|
||||
max_values_per_facet,
|
||||
default_order_by,
|
||||
rtxn: _,
|
||||
index: _,
|
||||
} = self;
|
||||
|
||||
f.debug_struct("FacetDistribution")
|
||||
.field("facets", facets)
|
||||
.field("candidates", candidates)
|
||||
.field("max_values_per_facet", max_values_per_facet)
|
||||
.field("default_order_by", default_order_by)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::iter;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::documents::documents_batch_reader_from_objects;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{milli_snap, FacetDistribution};
|
||||
use crate::{milli_snap, FacetDistribution, OrderBy};
|
||||
|
||||
#[test]
|
||||
fn few_candidates_few_facet_values() {
|
||||
@ -417,14 +427,14 @@ mod tests {
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates([0, 1, 2].iter().copied().collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
@ -432,7 +442,7 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates([1, 2].iter().copied().collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
@ -443,7 +453,7 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {" blue": 1, "RED": 1}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates([2].iter().copied().collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
@ -451,13 +461,22 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"RED": 1}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates([0, 1, 2].iter().copied().collect())
|
||||
.max_values_per_facet(1)
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 1}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(iter::once(("colour", OrderBy::Count)))
|
||||
.candidates([0, 1, 2].iter().copied().collect())
|
||||
.max_values_per_facet(1)
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -489,14 +508,14 @@ mod tests {
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.max_values_per_facet(1)
|
||||
.execute()
|
||||
.unwrap();
|
||||
@ -504,7 +523,7 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((0..10_000).collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
@ -512,7 +531,7 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((0..5_000).collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
@ -520,7 +539,7 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((0..5_000).collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
@ -528,13 +547,22 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((0..5_000).collect())
|
||||
.max_values_per_facet(1)
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(iter::once(("colour", OrderBy::Count)))
|
||||
.candidates((0..5_000).collect())
|
||||
.max_values_per_facet(1)
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Red": 3000}}"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -566,14 +594,14 @@ mod tests {
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), "no_candidates", @"ac9229ed5964d893af96a7076e2f8af5");
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.max_values_per_facet(2)
|
||||
.execute()
|
||||
.unwrap();
|
||||
@ -581,7 +609,7 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), "no_candidates_with_max_2", @r###"{"colour": {"0": 10, "1": 10}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((0..10_000).collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
@ -589,7 +617,7 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), "candidates_0_10_000", @"ac9229ed5964d893af96a7076e2f8af5");
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((0..5_000).collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
@ -626,14 +654,14 @@ mod tests {
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((0..1000).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
@ -641,7 +669,7 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((217..777).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
@ -678,14 +706,14 @@ mod tests {
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((0..1000).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
@ -693,7 +721,7 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1999.0)}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((217..777).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
@ -730,14 +758,14 @@ mod tests {
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((0..1000).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
@ -745,7 +773,7 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((217..777).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
@ -786,14 +814,14 @@ mod tests {
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((0..1000).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
@ -801,7 +829,7 @@ mod tests {
|
||||
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1998.0)}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.facets(iter::once(("colour", OrderBy::default())))
|
||||
.candidates((217..777).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
@ -1,3 +1,5 @@
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use heed::Result;
|
||||
@ -19,7 +21,7 @@ use crate::DocumentId;
|
||||
///
|
||||
/// The return value of the closure is a `ControlFlow<()>` which indicates whether we should
|
||||
/// keep iterating over the different facet values or stop.
|
||||
pub fn iterate_over_facet_distribution<'t, CB>(
|
||||
pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
@ -29,7 +31,7 @@ pub fn iterate_over_facet_distribution<'t, CB>(
|
||||
where
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
let mut fd = FacetDistribution { rtxn, db, field_id, callback };
|
||||
let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
|
||||
let highest_level = get_highest_level(
|
||||
rtxn,
|
||||
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
@ -44,7 +46,102 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
struct FacetDistribution<'t, CB>
|
||||
pub fn count_iterate_over_facet_distribution<'t, CB>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
candidates: &RoaringBitmap,
|
||||
mut callback: CB,
|
||||
) -> Result<()>
|
||||
where
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
/// # Important
|
||||
/// The order of the fields determines the order in which the facet values will be returned.
|
||||
/// This struct is inserted in a BinaryHeap and popped later on.
|
||||
#[derive(Debug, PartialOrd, Ord, PartialEq, Eq)]
|
||||
struct LevelEntry<'t> {
|
||||
/// The number of candidates in this entry.
|
||||
count: u64,
|
||||
/// The key level of the entry.
|
||||
level: Reverse<u8>,
|
||||
/// The left bound key.
|
||||
left_bound: &'t [u8],
|
||||
/// The number of keys we must look for after `left_bound`.
|
||||
group_size: u8,
|
||||
/// Any docid in the set of matching documents. Used to find the original facet string.
|
||||
any_docid: u32,
|
||||
}
|
||||
|
||||
// Represents the list of keys that we must explore.
|
||||
let mut heap = BinaryHeap::new();
|
||||
let highest_level = get_highest_level(
|
||||
rtxn,
|
||||
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||
// We first fill the heap with values from the highest level
|
||||
let starting_key =
|
||||
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
for el in db.range(rtxn, &(&starting_key..))?.take(usize::MAX) {
|
||||
let (key, value) = el?;
|
||||
// The range is unbounded on the right and the group size for the highest level is MAX,
|
||||
// so we need to check that we are not iterating over the next field id
|
||||
if key.field_id != field_id {
|
||||
break;
|
||||
}
|
||||
let intersection = value.bitmap & candidates;
|
||||
let count = intersection.len();
|
||||
if count != 0 {
|
||||
heap.push(LevelEntry {
|
||||
count,
|
||||
level: Reverse(key.level),
|
||||
left_bound: key.left_bound,
|
||||
group_size: value.size,
|
||||
any_docid: intersection.min().unwrap(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(LevelEntry { count, level, left_bound, group_size, any_docid }) = heap.pop()
|
||||
{
|
||||
if let Reverse(0) = level {
|
||||
match (callback)(left_bound, count, any_docid)? {
|
||||
ControlFlow::Continue(_) => (),
|
||||
ControlFlow::Break(_) => return Ok(()),
|
||||
}
|
||||
} else {
|
||||
let starting_key = FacetGroupKey { field_id, level: level.0 - 1, left_bound };
|
||||
for el in db.range(rtxn, &(&starting_key..))?.take(group_size as usize) {
|
||||
let (key, value) = el?;
|
||||
// The range is unbounded on the right and the group size for the highest level is MAX,
|
||||
// so we need to check that we are not iterating over the next field id
|
||||
if key.field_id != field_id {
|
||||
break;
|
||||
}
|
||||
let intersection = value.bitmap & candidates;
|
||||
let count = intersection.len();
|
||||
if count != 0 {
|
||||
heap.push(LevelEntry {
|
||||
count,
|
||||
level: Reverse(key.level),
|
||||
left_bound: key.left_bound,
|
||||
group_size: value.size,
|
||||
any_docid: intersection.min().unwrap(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Iterate over the facets values by lexicographic order.
|
||||
struct LexicographicFacetDistribution<'t, CB>
|
||||
where
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
@ -54,7 +151,7 @@ where
|
||||
callback: CB,
|
||||
}
|
||||
|
||||
impl<'t, CB> FacetDistribution<'t, CB>
|
||||
impl<'t, CB> LexicographicFacetDistribution<'t, CB>
|
||||
where
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
@ -86,6 +183,7 @@ where
|
||||
}
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
|
||||
fn iterate(
|
||||
&mut self,
|
||||
candidates: &RoaringBitmap,
|
||||
@ -98,10 +196,10 @@ where
|
||||
}
|
||||
let starting_key =
|
||||
FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound };
|
||||
let iter = self.db.range(self.rtxn, &(&starting_key..)).unwrap().take(group_size);
|
||||
let iter = self.db.range(self.rtxn, &(&starting_key..))?.take(group_size);
|
||||
|
||||
for el in iter {
|
||||
let (key, value) = el.unwrap();
|
||||
let (key, value) = el?;
|
||||
// The range is unbounded on the right and the group size for the highest level is MAX,
|
||||
// so we need to check that we are not iterating over the next field id
|
||||
if key.field_id != self.field_id {
|
||||
@ -116,7 +214,7 @@ where
|
||||
value.size as usize,
|
||||
)?;
|
||||
match cf {
|
||||
ControlFlow::Continue(_) => {}
|
||||
ControlFlow::Continue(_) => (),
|
||||
ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
|
||||
}
|
||||
}
|
||||
@ -132,7 +230,7 @@ mod tests {
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::iterate_over_facet_distribution;
|
||||
use super::lexicographically_iterate_over_facet_distribution;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
|
||||
@ -144,7 +242,7 @@ mod tests {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (0..=255).collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
iterate_over_facet_distribution(
|
||||
lexicographically_iterate_over_facet_distribution(
|
||||
&txn,
|
||||
index.content,
|
||||
0,
|
||||
@ -161,6 +259,7 @@ mod tests {
|
||||
txn.commit().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_distribution_all_stop_early() {
|
||||
let indexes = [get_simple_index(), get_random_looking_index()];
|
||||
@ -169,7 +268,7 @@ mod tests {
|
||||
let candidates = (0..=255).collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let mut nbr_facets = 0;
|
||||
iterate_over_facet_distribution(
|
||||
lexicographically_iterate_over_facet_distribution(
|
||||
&txn,
|
||||
index.content,
|
||||
0,
|
||||
|
@ -4,7 +4,7 @@ use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use heed::{BytesDecode, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::filter::{BadGeoError, Filter};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
|
@ -1,14 +1,21 @@
|
||||
use std::fmt;
|
||||
|
||||
use fst::automaton::{Automaton, Str};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||
use log::error;
|
||||
use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
|
||||
use self::new::PartialSearchResult;
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::{
|
||||
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
|
||||
execute_search, normalize_facet, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index,
|
||||
Result, SearchContext, BEU16,
|
||||
};
|
||||
|
||||
// Building these factories is not free.
|
||||
@ -16,19 +23,25 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
||||
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
|
||||
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
|
||||
|
||||
/// The maximum number of facets returned by the facet search route.
|
||||
const MAX_NUMBER_OF_FACETS: usize = 100;
|
||||
|
||||
pub mod facet;
|
||||
mod fst_utils;
|
||||
pub mod new;
|
||||
|
||||
pub struct Search<'a> {
|
||||
query: Option<String>,
|
||||
vector: Option<Vec<f32>>,
|
||||
// this should be linked to the String in the query
|
||||
filter: Option<Filter<'a>>,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
sort_criteria: Option<Vec<AscDesc>>,
|
||||
searchable_attributes: Option<&'a [String]>,
|
||||
geo_strategy: new::GeoSortStrategy,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
scoring_strategy: ScoringStrategy,
|
||||
words_limit: usize,
|
||||
exhaustive_number_hits: bool,
|
||||
rtxn: &'a heed::RoTxn<'a>,
|
||||
@ -39,12 +52,15 @@ impl<'a> Search<'a> {
|
||||
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
|
||||
Search {
|
||||
query: None,
|
||||
vector: None,
|
||||
filter: None,
|
||||
offset: 0,
|
||||
limit: 20,
|
||||
sort_criteria: None,
|
||||
searchable_attributes: None,
|
||||
geo_strategy: new::GeoSortStrategy::default(),
|
||||
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||
scoring_strategy: Default::default(),
|
||||
exhaustive_number_hits: false,
|
||||
words_limit: 10,
|
||||
rtxn,
|
||||
@ -57,6 +73,11 @@ impl<'a> Search<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn vector(&mut self, vector: impl Into<Vec<f32>>) -> &mut Search<'a> {
|
||||
self.vector = Some(vector.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn offset(&mut self, offset: usize) -> &mut Search<'a> {
|
||||
self.offset = offset;
|
||||
self
|
||||
@ -72,11 +93,21 @@ impl<'a> Search<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> {
|
||||
self.searchable_attributes = Some(searchable);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> {
|
||||
self.terms_matching_strategy = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn scoring_strategy(&mut self, value: ScoringStrategy) -> &mut Search<'a> {
|
||||
self.scoring_strategy = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn words_limit(&mut self, value: usize) -> &mut Search<'a> {
|
||||
self.words_limit = value;
|
||||
self
|
||||
@ -93,7 +124,7 @@ impl<'a> Search<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
/// Force the search to exhastivelly compute the number of candidates,
|
||||
/// Forces the search to exhaustively compute the number of candidates,
|
||||
/// this will increase the search time but allows finite pagination.
|
||||
pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> {
|
||||
self.exhaustive_number_hits = exhaustive_number_hits;
|
||||
@ -102,11 +133,18 @@ impl<'a> Search<'a> {
|
||||
|
||||
pub fn execute(&self) -> Result<SearchResult> {
|
||||
let mut ctx = SearchContext::new(self.index, self.rtxn);
|
||||
let PartialSearchResult { located_query_terms, candidates, documents_ids } =
|
||||
|
||||
if let Some(searchable_attributes) = self.searchable_attributes {
|
||||
ctx.searchable_attributes(searchable_attributes)?;
|
||||
}
|
||||
|
||||
let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } =
|
||||
execute_search(
|
||||
&mut ctx,
|
||||
&self.query,
|
||||
&self.vector,
|
||||
self.terms_matching_strategy,
|
||||
self.scoring_strategy,
|
||||
self.exhaustive_number_hits,
|
||||
&self.filter,
|
||||
&self.sort_criteria,
|
||||
@ -124,7 +162,7 @@ impl<'a> Search<'a> {
|
||||
None => MatchingWords::default(),
|
||||
};
|
||||
|
||||
Ok(SearchResult { matching_words, candidates, documents_ids })
|
||||
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids })
|
||||
}
|
||||
}
|
||||
|
||||
@ -132,12 +170,15 @@ impl fmt::Debug for Search<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let Search {
|
||||
query,
|
||||
vector: _,
|
||||
filter,
|
||||
offset,
|
||||
limit,
|
||||
sort_criteria,
|
||||
searchable_attributes,
|
||||
geo_strategy: _,
|
||||
terms_matching_strategy,
|
||||
scoring_strategy,
|
||||
words_limit,
|
||||
exhaustive_number_hits,
|
||||
rtxn: _,
|
||||
@ -145,11 +186,14 @@ impl fmt::Debug for Search<'_> {
|
||||
} = self;
|
||||
f.debug_struct("Search")
|
||||
.field("query", query)
|
||||
.field("vector", &"[...]")
|
||||
.field("filter", filter)
|
||||
.field("offset", offset)
|
||||
.field("limit", limit)
|
||||
.field("sort_criteria", sort_criteria)
|
||||
.field("searchable_attributes", searchable_attributes)
|
||||
.field("terms_matching_strategy", terms_matching_strategy)
|
||||
.field("scoring_strategy", scoring_strategy)
|
||||
.field("exhaustive_number_hits", exhaustive_number_hits)
|
||||
.field("words_limit", words_limit)
|
||||
.finish()
|
||||
@ -160,8 +204,8 @@ impl fmt::Debug for Search<'_> {
|
||||
pub struct SearchResult {
|
||||
pub matching_words: MatchingWords,
|
||||
pub candidates: RoaringBitmap,
|
||||
// TODO those documents ids should be associated with their criteria scores.
|
||||
pub documents_ids: Vec<DocumentId>,
|
||||
pub document_scores: Vec<Vec<ScoreDetails>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
@ -199,6 +243,195 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SearchForFacetValues<'a> {
|
||||
query: Option<String>,
|
||||
facet: String,
|
||||
search_query: Search<'a>,
|
||||
}
|
||||
|
||||
impl<'a> SearchForFacetValues<'a> {
|
||||
pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValues<'a> {
|
||||
SearchForFacetValues { query: None, facet, search_query }
|
||||
}
|
||||
|
||||
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
|
||||
self.query = Some(query.into());
|
||||
self
|
||||
}
|
||||
|
||||
fn one_original_value_of(
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
facet_str: &str,
|
||||
any_docid: DocumentId,
|
||||
) -> Result<Option<String>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
let key: (FieldId, _, &str) = (field_id, any_docid, facet_str);
|
||||
Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned()))
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
if !filterable_fields.contains(&self.facet) {
|
||||
return Err(UserError::InvalidFacetSearchFacetName {
|
||||
field: self.facet.clone(),
|
||||
valid_fields: filterable_fields.into_iter().collect(),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let fid = match fields_ids_map.id(&self.facet) {
|
||||
Some(fid) => fid,
|
||||
// we return an empty list of results when the attribute has been
|
||||
// set as filterable but no document contains this field (yet).
|
||||
None => return Ok(Vec::new()),
|
||||
};
|
||||
|
||||
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
|
||||
Some(fst) => fst,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let search_candidates = self.search_query.execute()?.candidates;
|
||||
|
||||
match self.query.as_ref() {
|
||||
Some(query) => {
|
||||
let query = normalize_facet(query);
|
||||
let query = query.as_str();
|
||||
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
|
||||
let field_authorizes_typos =
|
||||
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
|
||||
|
||||
if authorize_typos && field_authorizes_typos {
|
||||
let mut results = vec![];
|
||||
|
||||
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
|
||||
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: query };
|
||||
if let Some(FacetGroupValue { bitmap, .. }) =
|
||||
index.facet_id_string_docids.get(rtxn, &key)?
|
||||
{
|
||||
let count = search_candidates.intersection_len(&bitmap);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, query, bitmap.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
|
||||
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
|
||||
|
||||
let is_prefix = true;
|
||||
let automaton = if query.len() < one_typo as usize {
|
||||
build_dfa(query, 0, is_prefix)
|
||||
} else if query.len() < two_typos as usize {
|
||||
build_dfa(query, 1, is_prefix)
|
||||
} else {
|
||||
build_dfa(query, 2, is_prefix)
|
||||
};
|
||||
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
let mut length = 0;
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => {
|
||||
error!(
|
||||
"the facet value is missing from the facet database: {key:?}"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, value, docids.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
} else {
|
||||
let automaton = Str::new(query).starts_with();
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
let mut results = vec![];
|
||||
let mut length = 0;
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => {
|
||||
error!(
|
||||
"the facet value is missing from the facet database: {key:?}"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, value, docids.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let mut results = vec![];
|
||||
let mut length = 0;
|
||||
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
|
||||
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
|
||||
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
|
||||
result?;
|
||||
let count = search_candidates.intersection_len(&bitmap);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
|
||||
.unwrap_or_else(|| left_bound.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
|
||||
pub struct FacetValueHit {
|
||||
/// The original facet value
|
||||
pub value: String,
|
||||
/// The number of documents associated to this facet
|
||||
pub count: u64,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
#[allow(unused_imports)]
|
||||
|
@ -3,14 +3,18 @@ use roaring::RoaringBitmap;
|
||||
use super::logger::SearchLogger;
|
||||
use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
|
||||
use super::SearchContext;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
|
||||
use crate::Result;
|
||||
|
||||
pub struct BucketSortOutput {
|
||||
pub docids: Vec<u32>,
|
||||
pub scores: Vec<Vec<ScoreDetails>>,
|
||||
pub all_candidates: RoaringBitmap,
|
||||
}
|
||||
|
||||
// TODO: would probably be good to regroup some of these inside of a struct?
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
|
||||
@ -18,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
universe: &RoaringBitmap,
|
||||
from: usize,
|
||||
length: usize,
|
||||
scoring_strategy: ScoringStrategy,
|
||||
logger: &mut dyn SearchLogger<Q>,
|
||||
) -> Result<BucketSortOutput> {
|
||||
logger.initial_query(query);
|
||||
@ -31,7 +36,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
};
|
||||
|
||||
if universe.len() < from as u64 {
|
||||
return Ok(BucketSortOutput { docids: vec![], all_candidates: universe.clone() });
|
||||
return Ok(BucketSortOutput {
|
||||
docids: vec![],
|
||||
scores: vec![],
|
||||
all_candidates: universe.clone(),
|
||||
});
|
||||
}
|
||||
if ranking_rules.is_empty() {
|
||||
if let Some(distinct_fid) = distinct_fid {
|
||||
@ -49,22 +58,32 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
}
|
||||
let mut all_candidates = universe - excluded;
|
||||
all_candidates.extend(results.iter().copied());
|
||||
return Ok(BucketSortOutput { docids: results, all_candidates });
|
||||
return Ok(BucketSortOutput {
|
||||
scores: vec![Default::default(); results.len()],
|
||||
docids: results,
|
||||
all_candidates,
|
||||
});
|
||||
} else {
|
||||
let docids = universe.iter().skip(from).take(length).collect();
|
||||
return Ok(BucketSortOutput { docids, all_candidates: universe.clone() });
|
||||
let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
|
||||
return Ok(BucketSortOutput {
|
||||
scores: vec![Default::default(); docids.len()],
|
||||
docids,
|
||||
all_candidates: universe.clone(),
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
let ranking_rules_len = ranking_rules.len();
|
||||
|
||||
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
|
||||
|
||||
ranking_rules[0].start_iteration(ctx, logger, universe, query)?;
|
||||
|
||||
let mut ranking_rule_scores: Vec<ScoreDetails> = vec![];
|
||||
|
||||
let mut ranking_rule_universes: Vec<RoaringBitmap> =
|
||||
vec![RoaringBitmap::default(); ranking_rules_len];
|
||||
ranking_rule_universes[0] = universe.clone();
|
||||
|
||||
let mut cur_ranking_rule_index = 0;
|
||||
|
||||
/// Finish iterating over the current ranking rule, yielding
|
||||
@ -89,11 +108,15 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
} else {
|
||||
cur_ranking_rule_index -= 1;
|
||||
}
|
||||
if ranking_rule_scores.len() > cur_ranking_rule_index {
|
||||
ranking_rule_scores.pop();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let mut all_candidates = universe.clone();
|
||||
let mut valid_docids = vec![];
|
||||
let mut valid_scores = vec![];
|
||||
let mut cur_offset = 0usize;
|
||||
|
||||
macro_rules! maybe_add_to_results {
|
||||
@ -104,32 +127,44 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
length,
|
||||
logger,
|
||||
&mut valid_docids,
|
||||
&mut valid_scores,
|
||||
&mut all_candidates,
|
||||
&mut ranking_rule_universes,
|
||||
&mut ranking_rules,
|
||||
cur_ranking_rule_index,
|
||||
&mut cur_offset,
|
||||
distinct_fid,
|
||||
&ranking_rule_scores,
|
||||
$candidates,
|
||||
)?;
|
||||
};
|
||||
}
|
||||
|
||||
while valid_docids.len() < length {
|
||||
// The universe for this bucket is zero or one element, so we don't need to sort
|
||||
// anything, just extend the results and go back to the parent ranking rule.
|
||||
if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 {
|
||||
// The universe for this bucket is zero, so we don't need to sort
|
||||
// anything, just go back to the parent ranking rule.
|
||||
if ranking_rule_universes[cur_ranking_rule_index].is_empty()
|
||||
|| (scoring_strategy == ScoringStrategy::Skip
|
||||
&& ranking_rule_universes[cur_ranking_rule_index].len() == 1)
|
||||
{
|
||||
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||
maybe_add_to_results!(bucket);
|
||||
back!();
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(ctx, logger, &ranking_rule_universes[cur_ranking_rule_index])? else {
|
||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
|
||||
ctx,
|
||||
logger,
|
||||
&ranking_rule_universes[cur_ranking_rule_index],
|
||||
)?
|
||||
else {
|
||||
back!();
|
||||
continue;
|
||||
};
|
||||
|
||||
ranking_rule_scores.push(next_bucket.score);
|
||||
|
||||
logger.next_bucket_ranking_rule(
|
||||
cur_ranking_rule_index,
|
||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||
@ -143,10 +178,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
|
||||
|
||||
if cur_ranking_rule_index == ranking_rules_len - 1
|
||||
|| next_bucket.candidates.len() <= 1
|
||||
|| (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1)
|
||||
|| cur_offset + (next_bucket.candidates.len() as usize) < from
|
||||
{
|
||||
maybe_add_to_results!(next_bucket.candidates);
|
||||
ranking_rule_scores.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -166,7 +202,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(BucketSortOutput { docids: valid_docids, all_candidates })
|
||||
Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates })
|
||||
}
|
||||
|
||||
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
|
||||
@ -179,14 +215,18 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
|
||||
logger: &mut dyn SearchLogger<Q>,
|
||||
|
||||
valid_docids: &mut Vec<u32>,
|
||||
valid_scores: &mut Vec<Vec<ScoreDetails>>,
|
||||
all_candidates: &mut RoaringBitmap,
|
||||
|
||||
ranking_rule_universes: &mut [RoaringBitmap],
|
||||
ranking_rules: &mut [BoxRankingRule<'ctx, Q>],
|
||||
|
||||
cur_ranking_rule_index: usize,
|
||||
|
||||
cur_offset: &mut usize,
|
||||
|
||||
distinct_fid: Option<u16>,
|
||||
ranking_rule_scores: &[ScoreDetails],
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
// First apply the distinct rule on the candidates, reducing the universes if necessary
|
||||
@ -231,13 +271,17 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
|
||||
let candidates =
|
||||
candidates.iter().take(length - valid_docids.len()).copied().collect::<Vec<_>>();
|
||||
logger.add_to_results(&candidates);
|
||||
valid_docids.extend(&candidates);
|
||||
valid_docids.extend_from_slice(&candidates);
|
||||
valid_scores
|
||||
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
|
||||
}
|
||||
} else {
|
||||
// if we have passed the offset already, add some of the documents (up to the limit)
|
||||
let candidates = candidates.iter().take(length - valid_docids.len()).collect::<Vec<u32>>();
|
||||
logger.add_to_results(&candidates);
|
||||
valid_docids.extend(&candidates);
|
||||
valid_docids.extend_from_slice(&candidates);
|
||||
valid_scores
|
||||
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
|
||||
}
|
||||
|
||||
*cur_offset += candidates.len() as usize;
|
||||
|
@ -4,12 +4,13 @@ use std::hash::Hash;
|
||||
|
||||
use fxhash::FxHashMap;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::{BytesDecode, BytesEncode, Database, RoTxn};
|
||||
use heed::{BytesEncode, Database, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::interner::Interned;
|
||||
use super::Word;
|
||||
use crate::heed_codec::StrBEU16Codec;
|
||||
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
||||
use crate::{
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
|
||||
};
|
||||
@ -22,50 +23,104 @@ use crate::{
|
||||
#[derive(Default)]
|
||||
pub struct DatabaseCache<'ctx> {
|
||||
pub word_pair_proximity_docids:
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||
pub word_prefix_pair_proximity_docids:
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||
pub prefix_word_pair_proximity_docids:
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
|
||||
pub word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
|
||||
pub exact_word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
|
||||
pub word_prefix_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
|
||||
pub exact_word_prefix_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||
pub word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
|
||||
pub exact_word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
|
||||
pub word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
|
||||
pub exact_word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
|
||||
|
||||
pub words_fst: Option<fst::Set<Cow<'ctx, [u8]>>>,
|
||||
pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
||||
pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
||||
pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
||||
pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
||||
pub word_positions: FxHashMap<Interned<String>, Vec<u16>>,
|
||||
pub word_prefix_positions: FxHashMap<Interned<String>, Vec<u16>>,
|
||||
|
||||
pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
||||
pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
||||
pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
||||
pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
||||
pub word_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
||||
pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
||||
}
|
||||
impl<'ctx> DatabaseCache<'ctx> {
|
||||
fn get_value<'v, K1, KC>(
|
||||
fn get_value<'v, K1, KC, DC>(
|
||||
txn: &'ctx RoTxn,
|
||||
cache_key: K1,
|
||||
db_key: &'v KC::EItem,
|
||||
cache: &mut FxHashMap<K1, Option<&'ctx [u8]>>,
|
||||
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||
db: Database<KC, ByteSlice>,
|
||||
) -> Result<Option<&'ctx [u8]>>
|
||||
) -> Result<Option<DC::DItem>>
|
||||
where
|
||||
K1: Copy + Eq + Hash,
|
||||
KC: BytesEncode<'v>,
|
||||
DC: BytesDecodeOwned,
|
||||
{
|
||||
let bitmap_ptr = match cache.entry(cache_key) {
|
||||
Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(),
|
||||
Entry::Vacant(entry) => {
|
||||
let bitmap_ptr = db.get(txn, db_key)?;
|
||||
entry.insert(bitmap_ptr);
|
||||
bitmap_ptr
|
||||
if let Entry::Vacant(entry) = cache.entry(cache_key) {
|
||||
let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed);
|
||||
entry.insert(bitmap_ptr);
|
||||
}
|
||||
|
||||
match cache.get(&cache_key).unwrap() {
|
||||
Some(Cow::Borrowed(bytes)) => {
|
||||
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||
}
|
||||
};
|
||||
Ok(bitmap_ptr)
|
||||
Some(Cow::Owned(bytes)) => {
|
||||
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_value_from_keys<'v, K1, KC, DC>(
|
||||
txn: &'ctx RoTxn,
|
||||
cache_key: K1,
|
||||
db_keys: &'v [KC::EItem],
|
||||
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||
db: Database<KC, ByteSlice>,
|
||||
merger: MergeFn,
|
||||
) -> Result<Option<DC::DItem>>
|
||||
where
|
||||
K1: Copy + Eq + Hash,
|
||||
KC: BytesEncode<'v>,
|
||||
DC: BytesDecodeOwned,
|
||||
KC::EItem: Sized,
|
||||
{
|
||||
if let Entry::Vacant(entry) = cache.entry(cache_key) {
|
||||
let bitmap_ptr: Option<Cow<'ctx, [u8]>> = match db_keys {
|
||||
[] => None,
|
||||
[key] => db.get(txn, key)?.map(Cow::Borrowed),
|
||||
keys => {
|
||||
let bitmaps = keys
|
||||
.iter()
|
||||
.filter_map(|key| db.get(txn, key).transpose())
|
||||
.map(|v| v.map(Cow::Borrowed))
|
||||
.collect::<std::result::Result<Vec<Cow<[u8]>>, _>>()?;
|
||||
|
||||
if bitmaps.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(merger(&[], &bitmaps[..])?)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
entry.insert(bitmap_ptr);
|
||||
}
|
||||
|
||||
match cache.get(&cache_key).unwrap() {
|
||||
Some(Cow::Borrowed(bytes)) => {
|
||||
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||
}
|
||||
Some(Cow::Owned(bytes)) => {
|
||||
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx> SearchContext<'ctx> {
|
||||
pub fn get_words_fst(&mut self) -> Result<fst::Set<Cow<'ctx, [u8]>>> {
|
||||
if let Some(fst) = self.db_cache.words_fst.clone() {
|
||||
@ -99,30 +154,41 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
|
||||
/// Retrieve or insert the given value in the `word_docids` database.
|
||||
fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
match &self.restricted_fids {
|
||||
Some(restricted_fids) => {
|
||||
let interned = self.word_interner.get(word).as_str();
|
||||
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
&keys[..],
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_docids.remap_data_type::<ByteSlice>(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_db_exact_word_docids(
|
||||
&mut self,
|
||||
word: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
&mut self.db_cache.exact_word_docids,
|
||||
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
|
||||
@ -150,30 +216,41 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self,
|
||||
prefix: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
self.txn,
|
||||
prefix,
|
||||
self.word_interner.get(prefix).as_str(),
|
||||
&mut self.db_cache.word_prefix_docids,
|
||||
self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
match &self.restricted_fids {
|
||||
Some(restricted_fids) => {
|
||||
let interned = self.word_interner.get(prefix).as_str();
|
||||
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
&keys[..],
|
||||
&mut self.db_cache.word_prefix_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
self.word_interner.get(prefix).as_str(),
|
||||
&mut self.db_cache.word_prefix_docids,
|
||||
self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_db_exact_word_prefix_docids(
|
||||
&mut self,
|
||||
prefix: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
self.word_interner.get(prefix).as_str(),
|
||||
&mut self.db_cache.exact_word_prefix_docids,
|
||||
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_pair_proximity_docids(
|
||||
@ -182,7 +259,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
&(
|
||||
@ -192,9 +269,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
),
|
||||
&mut self.db_cache.word_pair_proximity_docids,
|
||||
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_pair_proximity_docids_len(
|
||||
@ -203,7 +278,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<u64>> {
|
||||
DatabaseCache::get_value(
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
&(
|
||||
@ -213,11 +288,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
),
|
||||
&mut self.db_cache.word_pair_proximity_docids,
|
||||
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| {
|
||||
CboRoaringBitmapLenCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())
|
||||
})
|
||||
.transpose()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_prefix_pair_proximity_docids(
|
||||
@ -226,7 +297,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
prefix2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, prefix2),
|
||||
&(
|
||||
@ -236,9 +307,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
),
|
||||
&mut self.db_cache.word_prefix_pair_proximity_docids,
|
||||
self.index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
)
|
||||
}
|
||||
pub fn get_db_prefix_word_pair_proximity_docids(
|
||||
&mut self,
|
||||
@ -246,7 +315,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
right: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, left_prefix, right),
|
||||
&(
|
||||
@ -256,9 +325,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
),
|
||||
&mut self.db_cache.prefix_word_pair_proximity_docids,
|
||||
self.index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_fid_docids(
|
||||
@ -266,15 +333,18 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word: Interned<String>,
|
||||
fid: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
// if the requested fid isn't in the restricted list, return None.
|
||||
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(word, fid),
|
||||
&(self.word_interner.get(word).as_str(), fid),
|
||||
&mut self.db_cache.word_fid_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_prefix_fid_docids(
|
||||
@ -282,15 +352,18 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word_prefix: Interned<String>,
|
||||
fid: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
// if the requested fid isn't in the restricted list, return None.
|
||||
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(word_prefix, fid),
|
||||
&(self.word_interner.get(word_prefix).as_str(), fid),
|
||||
&mut self.db_cache.word_prefix_fid_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_fids(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
|
||||
@ -309,7 +382,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
for result in remap_key_type {
|
||||
let ((_, fid), value) = result?;
|
||||
// filling other caches to avoid searching for them again
|
||||
self.db_cache.word_fid_docids.insert((word, fid), Some(value));
|
||||
self.db_cache.word_fid_docids.insert((word, fid), Some(Cow::Borrowed(value)));
|
||||
fids.push(fid);
|
||||
}
|
||||
entry.insert(fids.clone());
|
||||
@ -335,7 +408,9 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
for result in remap_key_type {
|
||||
let ((_, fid), value) = result?;
|
||||
// filling other caches to avoid searching for them again
|
||||
self.db_cache.word_prefix_fid_docids.insert((word_prefix, fid), Some(value));
|
||||
self.db_cache
|
||||
.word_prefix_fid_docids
|
||||
.insert((word_prefix, fid), Some(Cow::Borrowed(value)));
|
||||
fids.push(fid);
|
||||
}
|
||||
entry.insert(fids.clone());
|
||||
@ -350,15 +425,13 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word: Interned<String>,
|
||||
position: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(word, position),
|
||||
&(self.word_interner.get(word).as_str(), position),
|
||||
&mut self.db_cache.word_position_docids,
|
||||
self.index.word_position_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_prefix_position_docids(
|
||||
@ -366,15 +439,13 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word_prefix: Interned<String>,
|
||||
position: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(word_prefix, position),
|
||||
&(self.word_interner.get(word_prefix).as_str(), position),
|
||||
&mut self.db_cache.word_prefix_position_docids,
|
||||
self.index.word_prefix_position_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_positions(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
|
||||
@ -393,7 +464,9 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
for result in remap_key_type {
|
||||
let ((_, position), value) = result?;
|
||||
// filling other caches to avoid searching for them again
|
||||
self.db_cache.word_position_docids.insert((word, position), Some(value));
|
||||
self.db_cache
|
||||
.word_position_docids
|
||||
.insert((word, position), Some(Cow::Borrowed(value)));
|
||||
positions.push(position);
|
||||
}
|
||||
entry.insert(positions.clone());
|
||||
@ -424,7 +497,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
// filling other caches to avoid searching for them again
|
||||
self.db_cache
|
||||
.word_prefix_position_docids
|
||||
.insert((word_prefix, position), Some(value));
|
||||
.insert((word_prefix, position), Some(Cow::Borrowed(value)));
|
||||
positions.push(position);
|
||||
}
|
||||
entry.insert(positions.clone());
|
||||
|
@ -2,6 +2,7 @@ use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use super::query_graph::QueryGraph;
|
||||
use super::ranking_rules::{RankingRule, RankingRuleOutput};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::ExactTerm;
|
||||
use crate::{Result, SearchContext, SearchLogger};
|
||||
@ -244,7 +245,13 @@ impl State {
|
||||
candidates &= universe;
|
||||
(
|
||||
State::AttributeStarts(query_graph.clone(), candidates_per_attribute),
|
||||
Some(RankingRuleOutput { query: query_graph, candidates }),
|
||||
Some(RankingRuleOutput {
|
||||
query: query_graph,
|
||||
candidates,
|
||||
score: ScoreDetails::ExactAttribute(
|
||||
score_details::ExactAttribute::ExactMatch,
|
||||
),
|
||||
}),
|
||||
)
|
||||
}
|
||||
State::AttributeStarts(query_graph, candidates_per_attribute) => {
|
||||
@ -257,12 +264,24 @@ impl State {
|
||||
candidates &= universe;
|
||||
(
|
||||
State::Empty(query_graph.clone()),
|
||||
Some(RankingRuleOutput { query: query_graph, candidates }),
|
||||
Some(RankingRuleOutput {
|
||||
query: query_graph,
|
||||
candidates,
|
||||
score: ScoreDetails::ExactAttribute(
|
||||
score_details::ExactAttribute::MatchesStart,
|
||||
),
|
||||
}),
|
||||
)
|
||||
}
|
||||
State::Empty(query_graph) => (
|
||||
State::Empty(query_graph.clone()),
|
||||
Some(RankingRuleOutput { query: query_graph, candidates: universe.clone() }),
|
||||
Some(RankingRuleOutput {
|
||||
query: query_graph,
|
||||
candidates: universe.clone(),
|
||||
score: ScoreDetails::ExactAttribute(
|
||||
score_details::ExactAttribute::NoExactMatch,
|
||||
),
|
||||
}),
|
||||
),
|
||||
};
|
||||
(state, output)
|
||||
|
@ -8,6 +8,7 @@ use rstar::RTree;
|
||||
|
||||
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::{
|
||||
distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index, Result, SearchContext,
|
||||
SearchLogger,
|
||||
@ -80,7 +81,7 @@ pub struct GeoSort<Q: RankingRuleQueryTrait> {
|
||||
field_ids: Option<[u16; 2]>,
|
||||
rtree: Option<RTree<GeoPoint>>,
|
||||
|
||||
cached_sorted_docids: VecDeque<u32>,
|
||||
cached_sorted_docids: VecDeque<(u32, [f64; 2])>,
|
||||
geo_candidates: RoaringBitmap,
|
||||
}
|
||||
|
||||
@ -130,7 +131,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
let point = lat_lng_to_xyz(&self.point);
|
||||
for point in rtree.nearest_neighbor_iter(&point) {
|
||||
if self.geo_candidates.contains(point.data.0) {
|
||||
self.cached_sorted_docids.push_back(point.data.0);
|
||||
self.cached_sorted_docids.push_back(point.data);
|
||||
if self.cached_sorted_docids.len() >= cache_size {
|
||||
break;
|
||||
}
|
||||
@ -142,7 +143,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
let point = lat_lng_to_xyz(&opposite_of(self.point));
|
||||
for point in rtree.nearest_neighbor_iter(&point) {
|
||||
if self.geo_candidates.contains(point.data.0) {
|
||||
self.cached_sorted_docids.push_front(point.data.0);
|
||||
self.cached_sorted_docids.push_front(point.data);
|
||||
if self.cached_sorted_docids.len() >= cache_size {
|
||||
break;
|
||||
}
|
||||
@ -177,7 +178,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
// computing the distance between two points is expensive thus we cache the result
|
||||
documents
|
||||
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
|
||||
self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id));
|
||||
self.cached_sorted_docids.extend(documents.into_iter());
|
||||
};
|
||||
|
||||
Ok(())
|
||||
@ -220,12 +221,19 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
logger: &mut dyn SearchLogger<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<Option<RankingRuleOutput<Q>>> {
|
||||
assert!(universe.len() > 1);
|
||||
let query = self.query.as_ref().unwrap().clone();
|
||||
self.geo_candidates &= universe;
|
||||
|
||||
if self.geo_candidates.is_empty() {
|
||||
return Ok(Some(RankingRuleOutput { query, candidates: universe.clone() }));
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: universe.clone(),
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: None,
|
||||
}),
|
||||
}));
|
||||
}
|
||||
|
||||
let ascending = self.ascending;
|
||||
@ -236,11 +244,16 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
cache.pop_back()
|
||||
}
|
||||
};
|
||||
while let Some(id) = next(&mut self.cached_sorted_docids) {
|
||||
while let Some((id, point)) = next(&mut self.cached_sorted_docids) {
|
||||
if self.geo_candidates.contains(id) {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: RoaringBitmap::from_iter([id]),
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: Some(point),
|
||||
}),
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
@ -50,6 +50,7 @@ use super::ranking_rule_graph::{
|
||||
};
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||
use crate::score_details::Rank;
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::ranking_rule_graph::PathVisitor;
|
||||
use crate::{Result, TermsMatchingStrategy};
|
||||
@ -118,6 +119,8 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
||||
all_costs: MappedInterner<QueryNode, Vec<u64>>,
|
||||
/// An index in the first element of `all_distances`, giving the cost of the next bucket
|
||||
cur_cost: u64,
|
||||
/// One above the highest possible cost for this rule
|
||||
next_max_cost: u64,
|
||||
}
|
||||
|
||||
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
|
||||
@ -131,7 +134,20 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
_universe: &RoaringBitmap,
|
||||
query_graph: &QueryGraph,
|
||||
) -> Result<()> {
|
||||
// the `next_max_cost` is the successor integer to the maximum cost of the paths in the graph.
|
||||
//
|
||||
// When there is a matching strategy, it also factors the additional costs of:
|
||||
// 1. The words that are matched in phrases
|
||||
// 2. Skipping words (by adding them to the paths with a cost)
|
||||
let mut next_max_cost = 1;
|
||||
let removal_cost = if let Some(terms_matching_strategy) = self.terms_matching_strategy {
|
||||
// add the cost of the phrase to the next_max_cost
|
||||
next_max_cost += query_graph
|
||||
.words_in_phrases_count(ctx)
|
||||
// remove 1 from the words in phrases count, because when there is a phrase we can now have a document
|
||||
// where only the phrase is matching, and none of the non-phrase words.
|
||||
// With the `1` that `next_max_cost` is initialized with, this gets counted twice.
|
||||
.saturating_sub(1) as u64;
|
||||
match terms_matching_strategy {
|
||||
TermsMatchingStrategy::Last => {
|
||||
let removal_order =
|
||||
@ -139,13 +155,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
let mut forbidden_nodes =
|
||||
SmallBitmap::for_interned_values_in(&query_graph.nodes);
|
||||
let mut costs = query_graph.nodes.map(|_| None);
|
||||
let mut cost = 100;
|
||||
// FIXME: this works because only words uses termsmatchingstrategy at the moment.
|
||||
for ns in removal_order {
|
||||
for n in ns.iter() {
|
||||
*costs.get_mut(n) = Some((cost, forbidden_nodes.clone()));
|
||||
*costs.get_mut(n) = Some((1, forbidden_nodes.clone()));
|
||||
}
|
||||
forbidden_nodes.union(&ns);
|
||||
cost += 100;
|
||||
}
|
||||
costs
|
||||
}
|
||||
@ -162,12 +177,16 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
// Then pre-compute the cost of all paths from each node to the end node
|
||||
let all_costs = graph.find_all_costs_to_end();
|
||||
|
||||
next_max_cost +=
|
||||
all_costs.get(graph.query_graph.root_node).iter().copied().max().unwrap_or(0);
|
||||
|
||||
let state = GraphBasedRankingRuleState {
|
||||
graph,
|
||||
conditions_cache: condition_docids_cache,
|
||||
dead_ends_cache,
|
||||
all_costs,
|
||||
cur_cost: 0,
|
||||
next_max_cost,
|
||||
};
|
||||
|
||||
self.state = Some(state);
|
||||
@ -181,21 +200,15 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||
// If universe.len() <= 1, the bucket sort algorithm
|
||||
// should not have called this function.
|
||||
assert!(universe.len() > 1);
|
||||
// Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
|
||||
// should never happen
|
||||
let mut state = self.state.take().unwrap();
|
||||
|
||||
let all_costs = state.all_costs.get(state.graph.query_graph.root_node);
|
||||
// Retrieve the cost of the paths to compute
|
||||
let Some(&cost) = state
|
||||
.all_costs
|
||||
.get(state.graph.query_graph.root_node)
|
||||
.iter()
|
||||
.find(|c| **c >= state.cur_cost) else {
|
||||
self.state = None;
|
||||
return Ok(None);
|
||||
let Some(&cost) = all_costs.iter().find(|c| **c >= state.cur_cost) else {
|
||||
self.state = None;
|
||||
return Ok(None);
|
||||
};
|
||||
state.cur_cost = cost + 1;
|
||||
|
||||
@ -207,8 +220,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
dead_ends_cache,
|
||||
all_costs,
|
||||
cur_cost: _,
|
||||
next_max_cost,
|
||||
} = &mut state;
|
||||
|
||||
let rank = *next_max_cost - cost;
|
||||
let score = G::rank_to_score(Rank { rank: rank as u32, max_rank: *next_max_cost as u32 });
|
||||
|
||||
let mut universe = universe.clone();
|
||||
|
||||
let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner);
|
||||
@ -295,8 +312,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
|
||||
// We modify the next query graph so that it only contains the subgraph
|
||||
// that was used to compute this bucket
|
||||
// But we only do it in case the bucket length is >1, because otherwise
|
||||
// we know the child ranking rule won't be called anyway
|
||||
|
||||
let paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>> = good_paths
|
||||
.into_iter()
|
||||
@ -325,7 +340,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
|
||||
self.state = Some(state);
|
||||
|
||||
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket }))
|
||||
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket, score }))
|
||||
}
|
||||
|
||||
fn end_iteration(
|
||||
|
@ -80,7 +80,9 @@ impl MatchingWords {
|
||||
let word = self.word_interner.get(*word);
|
||||
// if the word is a prefix we match using starts_with.
|
||||
if located_words.is_prefix && token.lemma().starts_with(word) {
|
||||
let Some((char_index, c)) = word.char_indices().take(located_words.original_char_count).last() else {
|
||||
let Some((char_index, c)) =
|
||||
word.char_indices().take(located_words.original_char_count).last()
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let prefix_length = char_index + c.len_utf8();
|
||||
@ -256,7 +258,8 @@ pub(crate) mod tests {
|
||||
let temp_index = temp_index_with_documents();
|
||||
let rtxn = temp_index.read_txn().unwrap();
|
||||
let mut ctx = SearchContext::new(&temp_index, &rtxn);
|
||||
let tokenizer = TokenizerBuilder::new().build();
|
||||
let mut builder = TokenizerBuilder::default();
|
||||
let tokenizer = builder.build();
|
||||
let tokens = tokenizer.tokenize("split this world");
|
||||
let query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
|
||||
let matching_words = MatchingWords::new(ctx, query_terms);
|
||||
|
@ -12,16 +12,16 @@ const DEFAULT_HIGHLIGHT_PREFIX: &str = "<em>";
|
||||
const DEFAULT_HIGHLIGHT_SUFFIX: &str = "</em>";
|
||||
|
||||
/// Structure used to build a Matcher allowing to customize formating tags.
|
||||
pub struct MatcherBuilder<'a, A> {
|
||||
pub struct MatcherBuilder<'m> {
|
||||
matching_words: MatchingWords,
|
||||
tokenizer: Tokenizer<'a, 'a, A>,
|
||||
tokenizer: Tokenizer<'m>,
|
||||
crop_marker: Option<String>,
|
||||
highlight_prefix: Option<String>,
|
||||
highlight_suffix: Option<String>,
|
||||
}
|
||||
|
||||
impl<'a, A> MatcherBuilder<'a, A> {
|
||||
pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'a, 'a, A>) -> Self {
|
||||
impl<'m> MatcherBuilder<'m> {
|
||||
pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'m>) -> Self {
|
||||
Self {
|
||||
matching_words,
|
||||
tokenizer,
|
||||
@ -46,7 +46,7 @@ impl<'a, A> MatcherBuilder<'a, A> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build<'t, 'm>(&'m self, text: &'t str) -> Matcher<'t, 'm, A> {
|
||||
pub fn build<'t>(&'m self, text: &'t str) -> Matcher<'t, 'm> {
|
||||
let crop_marker = match &self.crop_marker {
|
||||
Some(marker) => marker.as_str(),
|
||||
None => DEFAULT_CROP_MARKER,
|
||||
@ -103,17 +103,17 @@ pub struct MatchBounds {
|
||||
|
||||
/// Structure used to analize a string, compute words that match,
|
||||
/// and format the source string, returning a highlighted and cropped sub-string.
|
||||
pub struct Matcher<'t, 'm, A> {
|
||||
pub struct Matcher<'t, 'm> {
|
||||
text: &'t str,
|
||||
matching_words: &'m MatchingWords,
|
||||
tokenizer: &'m Tokenizer<'m, 'm, A>,
|
||||
tokenizer: &'m Tokenizer<'m>,
|
||||
crop_marker: &'m str,
|
||||
highlight_prefix: &'m str,
|
||||
highlight_suffix: &'m str,
|
||||
matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
|
||||
}
|
||||
|
||||
impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
|
||||
impl<'t> Matcher<'t, '_> {
|
||||
/// Iterates over tokens and save any of them that matches the query.
|
||||
fn compute_matches(&mut self) -> &mut Self {
|
||||
/// some words are counted as matches only if they are close together and in the good order,
|
||||
@ -503,13 +503,15 @@ mod tests {
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{execute_search, SearchContext};
|
||||
|
||||
impl<'a> MatcherBuilder<'a, &[u8]> {
|
||||
impl<'a> MatcherBuilder<'a> {
|
||||
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
||||
let mut ctx = SearchContext::new(index, rtxn);
|
||||
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
||||
&mut ctx,
|
||||
&Some(query.to_string()),
|
||||
&None,
|
||||
crate::TermsMatchingStrategy::default(),
|
||||
crate::score_details::ScoringStrategy::Skip,
|
||||
false,
|
||||
&None,
|
||||
&None,
|
||||
@ -528,7 +530,7 @@ mod tests {
|
||||
None => MatchingWords::default(),
|
||||
};
|
||||
|
||||
MatcherBuilder::new(matching_words, TokenizerBuilder::new().build())
|
||||
MatcherBuilder::new(matching_words, TokenizerBuilder::default().into_tokenizer())
|
||||
}
|
||||
}
|
||||
|
||||
@ -688,7 +690,7 @@ mod tests {
|
||||
// should crop the phrase instead of croping around the match.
|
||||
insta::assert_snapshot!(
|
||||
matcher.format(format_options),
|
||||
@"… Split The World is a book written by Emily Henry…"
|
||||
@"…Split The World is a book written by Emily Henry…"
|
||||
);
|
||||
|
||||
// Text containing some matches.
|
||||
|
@ -20,7 +20,7 @@ mod sort;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
use bucket_sort::{bucket_sort, BucketSortOutput};
|
||||
use charabia::TokenizerBuilder;
|
||||
@ -28,6 +28,7 @@ use db_cache::DatabaseCache;
|
||||
use exact_attribute::ExactAttribute;
|
||||
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
|
||||
use heed::RoTxn;
|
||||
use hnsw::Searcher;
|
||||
use interner::{DedupInterner, Interner};
|
||||
pub use logger::visual::VisualSearchLogger;
|
||||
pub use logger::{DefaultSearchLogger, SearchLogger};
|
||||
@ -39,13 +40,19 @@ use ranking_rules::{
|
||||
use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
|
||||
use roaring::RoaringBitmap;
|
||||
use sort::Sort;
|
||||
use space::Neighbor;
|
||||
|
||||
use self::geo_sort::GeoSort;
|
||||
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||
use self::graph_based_ranking_rule::Words;
|
||||
use self::interner::Interned;
|
||||
use crate::error::FieldIdMapMissingEntry;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::apply_distinct_rule;
|
||||
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
|
||||
use crate::{
|
||||
normalize_vector, AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy,
|
||||
UserError, BEU32,
|
||||
};
|
||||
|
||||
/// A structure used throughout the execution of a search query.
|
||||
pub struct SearchContext<'ctx> {
|
||||
@ -56,6 +63,7 @@ pub struct SearchContext<'ctx> {
|
||||
pub phrase_interner: DedupInterner<Phrase>,
|
||||
pub term_interner: Interner<QueryTerm>,
|
||||
pub phrase_docids: PhraseDocIdsCache,
|
||||
pub restricted_fids: Option<Vec<u16>>,
|
||||
}
|
||||
|
||||
impl<'ctx> SearchContext<'ctx> {
|
||||
@ -68,8 +76,66 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
phrase_interner: <_>::default(),
|
||||
term_interner: <_>::default(),
|
||||
phrase_docids: <_>::default(),
|
||||
restricted_fids: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
|
||||
let fids_map = self.index.fields_ids_map(self.txn)?;
|
||||
let searchable_names = self.index.searchable_fields(self.txn)?;
|
||||
|
||||
let mut restricted_fids = Vec::new();
|
||||
for field_name in searchable_attributes {
|
||||
let searchable_contains_name =
|
||||
searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
|
||||
let fid = match (fids_map.id(field_name), searchable_contains_name) {
|
||||
// The Field id exist and the field is searchable
|
||||
(Some(fid), Some(true)) | (Some(fid), None) => fid,
|
||||
// The field is searchable but the Field id doesn't exist => Internal Error
|
||||
(None, Some(true)) => {
|
||||
return Err(FieldIdMapMissingEntry::FieldName {
|
||||
field_name: field_name.to_string(),
|
||||
process: "search",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
// The field is not searchable => User error
|
||||
_otherwise => {
|
||||
let mut valid_fields: BTreeSet<_> =
|
||||
fids_map.names().map(String::from).collect();
|
||||
|
||||
// Filter by the searchable names
|
||||
if let Some(sn) = searchable_names {
|
||||
let searchable_names = sn.iter().map(|s| s.to_string()).collect();
|
||||
valid_fields = &valid_fields & &searchable_names;
|
||||
}
|
||||
|
||||
let searchable_count = valid_fields.len();
|
||||
|
||||
// Remove hidden fields
|
||||
if let Some(dn) = self.index.displayed_fields(self.txn)? {
|
||||
let displayable_names = dn.iter().map(|s| s.to_string()).collect();
|
||||
valid_fields = &valid_fields & &displayable_names;
|
||||
}
|
||||
|
||||
let hidden_fields = searchable_count > valid_fields.len();
|
||||
let field = field_name.to_string();
|
||||
return Err(UserError::InvalidSearchableAttribute {
|
||||
field,
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
};
|
||||
|
||||
restricted_fids.push(fid);
|
||||
}
|
||||
|
||||
self.restricted_fids = Some(restricted_fids);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
|
||||
@ -349,7 +415,9 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
pub fn execute_search(
|
||||
ctx: &mut SearchContext,
|
||||
query: &Option<String>,
|
||||
vector: &Option<Vec<f32>>,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
scoring_strategy: ScoringStrategy,
|
||||
exhaustive_number_hits: bool,
|
||||
filters: &Option<Filter>,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
@ -368,8 +436,40 @@ pub fn execute_search(
|
||||
|
||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||
|
||||
let mut located_query_terms = None;
|
||||
if let Some(vector) = vector {
|
||||
let mut searcher = Searcher::new();
|
||||
let hnsw = ctx.index.vector_hnsw(ctx.txn)?.unwrap_or_default();
|
||||
let ef = hnsw.len().min(100);
|
||||
let mut dest = vec![Neighbor { index: 0, distance: 0 }; ef];
|
||||
let vector = normalize_vector(vector.clone());
|
||||
let neighbors = hnsw.nearest(&vector, ef, &mut searcher, &mut dest[..]);
|
||||
|
||||
let mut docids = Vec::new();
|
||||
let mut uniq_docids = RoaringBitmap::new();
|
||||
for Neighbor { index, distance: _ } in neighbors.iter() {
|
||||
let index = BEU32::new(*index as u32);
|
||||
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get();
|
||||
if universe.contains(docid) && uniq_docids.insert(docid) {
|
||||
docids.push(docid);
|
||||
if docids.len() == (from + length) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// return the nearest documents that are also part of the candidates
|
||||
// along with a dummy list of scores that are useless in this context.
|
||||
let docids: Vec<_> = docids.into_iter().skip(from).take(length).collect();
|
||||
|
||||
return Ok(PartialSearchResult {
|
||||
candidates: universe,
|
||||
document_scores: vec![Vec::new(); docids.len()],
|
||||
documents_ids: docids,
|
||||
located_query_terms: None,
|
||||
});
|
||||
}
|
||||
|
||||
let mut located_query_terms = None;
|
||||
let query_terms = if let Some(query) = query {
|
||||
// We make sure that the analyzer is aware of the stop words
|
||||
// this ensures that the query builder is able to properly remove them.
|
||||
@ -379,6 +479,20 @@ pub fn execute_search(
|
||||
tokbuilder.stop_words(stop_words);
|
||||
}
|
||||
|
||||
let separators = ctx.index.allowed_separators(ctx.txn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
tokbuilder.separators(separators);
|
||||
}
|
||||
|
||||
let dictionary = ctx.index.dictionary(ctx.txn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
tokbuilder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
let script_lang_map = ctx.index.script_language(ctx.txn)?;
|
||||
if !script_lang_map.is_empty() {
|
||||
tokbuilder.allow_list(&script_lang_map);
|
||||
@ -411,7 +525,16 @@ pub fn execute_search(
|
||||
universe =
|
||||
resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
|
||||
|
||||
bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)?
|
||||
bucket_sort(
|
||||
ctx,
|
||||
ranking_rules,
|
||||
&graph,
|
||||
&universe,
|
||||
from,
|
||||
length,
|
||||
scoring_strategy,
|
||||
query_graph_logger,
|
||||
)?
|
||||
} else {
|
||||
let ranking_rules =
|
||||
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
|
||||
@ -422,17 +545,19 @@ pub fn execute_search(
|
||||
&universe,
|
||||
from,
|
||||
length,
|
||||
scoring_strategy,
|
||||
placeholder_search_logger,
|
||||
)?
|
||||
};
|
||||
|
||||
let BucketSortOutput { docids, mut all_candidates } = bucket_sort_output;
|
||||
let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output;
|
||||
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
||||
|
||||
// The candidates is the universe unless the exhaustive number of hits
|
||||
// is requested and a distinct attribute is set.
|
||||
if exhaustive_number_hits {
|
||||
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
|
||||
if let Some(distinct_fid) = ctx.index.fields_ids_map(ctx.txn)?.id(f) {
|
||||
if let Some(distinct_fid) = fields_ids_map.id(f) {
|
||||
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
|
||||
}
|
||||
}
|
||||
@ -440,6 +565,7 @@ pub fn execute_search(
|
||||
|
||||
Ok(PartialSearchResult {
|
||||
candidates: all_candidates,
|
||||
document_scores: scores,
|
||||
documents_ids: docids,
|
||||
located_query_terms,
|
||||
})
|
||||
@ -491,4 +617,5 @@ pub struct PartialSearchResult {
|
||||
pub located_query_terms: Option<Vec<LocatedQueryTerm>>,
|
||||
pub candidates: RoaringBitmap,
|
||||
pub documents_ids: Vec<DocumentId>,
|
||||
pub document_scores: Vec<Vec<ScoreDetails>>,
|
||||
}
|
||||
|
@ -342,6 +342,22 @@ impl QueryGraph {
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
/// Number of words in the phrases in this query graph
|
||||
pub(crate) fn words_in_phrases_count(&self, ctx: &SearchContext) -> usize {
|
||||
let mut word_count = 0;
|
||||
for (_, node) in self.nodes.iter() {
|
||||
match &node.data {
|
||||
QueryNodeData::Term(term) => {
|
||||
let Some(phrase) = term.term_subset.original_phrase(ctx) else { continue };
|
||||
let phrase = ctx.phrase_interner.get(phrase);
|
||||
word_count += phrase.words.iter().copied().filter(|a| a.is_some()).count()
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
word_count
|
||||
}
|
||||
}
|
||||
|
||||
fn add_node(nodes_data: &mut Vec<QueryNodeData>, node_data: QueryNodeData) -> u16 {
|
||||
|
@ -175,9 +175,7 @@ impl QueryTermSubset {
|
||||
|
||||
pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> {
|
||||
let original = ctx.term_interner.get(self.original);
|
||||
let Some(use_prefix_db) = original.zero_typo.use_prefix_db else {
|
||||
return None
|
||||
};
|
||||
let Some(use_prefix_db) = original.zero_typo.use_prefix_db else { return None };
|
||||
let word = match &self.zero_typo_subset {
|
||||
NTypoTermSubset::All => Some(use_prefix_db),
|
||||
NTypoTermSubset::Subset { words, phrases: _ } => {
|
||||
@ -261,13 +259,15 @@ impl QueryTermSubset {
|
||||
|
||||
match &self.one_typo_subset {
|
||||
NTypoTermSubset::All => {
|
||||
let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else {
|
||||
let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo
|
||||
else {
|
||||
panic!()
|
||||
};
|
||||
result.extend(one_typo.iter().copied().map(Word::Derived))
|
||||
}
|
||||
NTypoTermSubset::Subset { words, phrases: _ } => {
|
||||
let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else {
|
||||
let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo
|
||||
else {
|
||||
panic!()
|
||||
};
|
||||
result.extend(one_typo.intersection(words).copied().map(Word::Derived));
|
||||
@ -277,15 +277,11 @@ impl QueryTermSubset {
|
||||
|
||||
match &self.two_typo_subset {
|
||||
NTypoTermSubset::All => {
|
||||
let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else {
|
||||
panic!()
|
||||
};
|
||||
let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() };
|
||||
result.extend(two_typos.iter().copied().map(Word::Derived));
|
||||
}
|
||||
NTypoTermSubset::Subset { words, phrases: _ } => {
|
||||
let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else {
|
||||
panic!()
|
||||
};
|
||||
let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() };
|
||||
result.extend(two_typos.intersection(words).copied().map(Word::Derived));
|
||||
}
|
||||
NTypoTermSubset::Nothing => {}
|
||||
@ -308,13 +304,15 @@ impl QueryTermSubset {
|
||||
|
||||
match &self.one_typo_subset {
|
||||
NTypoTermSubset::All => {
|
||||
let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo else {
|
||||
let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo
|
||||
else {
|
||||
panic!();
|
||||
};
|
||||
result.extend(split_words.iter().copied());
|
||||
}
|
||||
NTypoTermSubset::Subset { phrases, .. } => {
|
||||
let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo else {
|
||||
let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo
|
||||
else {
|
||||
panic!();
|
||||
};
|
||||
if let Some(split_words) = split_words {
|
||||
|
@ -7,7 +7,7 @@ use crate::{Result, SearchContext, MAX_WORD_LENGTH};
|
||||
/// Convert the tokenised search query into a list of located query terms.
|
||||
pub fn located_query_terms_from_tokens(
|
||||
ctx: &mut SearchContext,
|
||||
query: NormalizedTokenIter<&[u8]>,
|
||||
query: NormalizedTokenIter,
|
||||
words_limit: Option<usize>,
|
||||
) -> Result<Vec<LocatedQueryTerm>> {
|
||||
let nbr_typos = number_of_typos_allowed(ctx)?;
|
||||
@ -303,7 +303,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn start_with_hard_separator() -> Result<()> {
|
||||
let tokenizer = TokenizerBuilder::new().build();
|
||||
let mut builder = TokenizerBuilder::default();
|
||||
let tokenizer = builder.build();
|
||||
let tokens = tokenizer.tokenize(".");
|
||||
let index = temp_index_with_documents();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
@ -49,10 +49,15 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
if let Some((cost_of_ignoring, forbidden_nodes)) =
|
||||
cost_of_ignoring_node.get(dest_idx)
|
||||
{
|
||||
let dest = graph_nodes.get(dest_idx);
|
||||
let dest_size = match &dest.data {
|
||||
QueryNodeData::Term(term) => term.term_ids.len(),
|
||||
_ => panic!(),
|
||||
};
|
||||
let new_edge_id = edges_store.insert(Some(Edge {
|
||||
source_node: source_id,
|
||||
dest_node: dest_idx,
|
||||
cost: *cost_of_ignoring,
|
||||
cost: *cost_of_ignoring * dest_size as u32,
|
||||
condition: None,
|
||||
nodes_to_skip: forbidden_nodes.clone(),
|
||||
}));
|
||||
|
@ -1,6 +1,7 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
@ -84,4 +85,8 @@ impl RankingRuleGraphTrait for ExactnessGraph {
|
||||
|
||||
Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
|
||||
}
|
||||
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Exactness(rank)
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ use fxhash::FxHashSet;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
|
||||
@ -68,13 +69,42 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
}
|
||||
|
||||
let mut edges = vec![];
|
||||
for fid in all_fields {
|
||||
for fid in all_fields.iter().copied() {
|
||||
edges.push((
|
||||
fid as u32 * term.term_ids.len() as u32,
|
||||
conditions_interner.insert(FidCondition { term: term.clone(), fid }),
|
||||
));
|
||||
}
|
||||
|
||||
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
||||
let max_fid: Option<u16> = {
|
||||
if let Some(max_fid) = ctx
|
||||
.index
|
||||
.searchable_fields_ids(ctx.txn)?
|
||||
.map(|field_ids| field_ids.into_iter().max())
|
||||
{
|
||||
max_fid
|
||||
} else {
|
||||
ctx.index.fields_ids_map(ctx.txn)?.ids().max()
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(max_fid) = max_fid {
|
||||
if !all_fields.contains(&max_fid) {
|
||||
edges.push((
|
||||
max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
||||
conditions_interner.insert(FidCondition {
|
||||
term: term.clone(), // TODO remove this ugly clone
|
||||
fid: max_fid,
|
||||
}),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(edges)
|
||||
}
|
||||
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Fid(rank)
|
||||
}
|
||||
}
|
||||
|
@ -41,6 +41,7 @@ use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner
|
||||
use super::query_term::LocatedQueryTermSubset;
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::score_details::{Rank, ScoreDetails};
|
||||
use crate::Result;
|
||||
|
||||
pub struct ComputedCondition {
|
||||
@ -110,6 +111,9 @@ pub trait RankingRuleGraphTrait: Sized + 'static {
|
||||
source_node: Option<&LocatedQueryTermSubset>,
|
||||
dest_node: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>>;
|
||||
|
||||
/// Convert the rank of a path to its corresponding score for the ranking rule
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails;
|
||||
}
|
||||
|
||||
/// The graph used by graph-based ranking rules.
|
||||
|
@ -2,6 +2,7 @@ use fxhash::{FxHashMap, FxHashSet};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_position;
|
||||
@ -77,6 +78,8 @@ impl RankingRuleGraphTrait for PositionGraph {
|
||||
let mut positions_for_costs = FxHashMap::<u32, Vec<u16>>::default();
|
||||
|
||||
for position in all_positions {
|
||||
// FIXME: bucketed position???
|
||||
let distance = position.abs_diff(*term.positions.start());
|
||||
let cost = {
|
||||
let mut cost = 0;
|
||||
for i in 0..term.term_ids.len() {
|
||||
@ -84,15 +87,17 @@ impl RankingRuleGraphTrait for PositionGraph {
|
||||
// Because if two words are in the same bucketed position (e.g. 32) and consecutive,
|
||||
// then their position cost will be 32+32=64, but an ngram of these two words at the
|
||||
// same position will have a cost of 32+32+1=65
|
||||
cost += cost_from_position(position as u32 + i as u32);
|
||||
cost += cost_from_distance(distance as u32 + i as u32);
|
||||
}
|
||||
cost
|
||||
};
|
||||
positions_for_costs.entry(cost).or_default().push(position);
|
||||
}
|
||||
|
||||
let mut edges = vec![];
|
||||
let max_cost = term.term_ids.len() as u32 * 10;
|
||||
let max_cost_exists = positions_for_costs.contains_key(&max_cost);
|
||||
|
||||
let mut edges = vec![];
|
||||
for (cost, positions) in positions_for_costs {
|
||||
edges.push((
|
||||
cost,
|
||||
@ -100,12 +105,25 @@ impl RankingRuleGraphTrait for PositionGraph {
|
||||
));
|
||||
}
|
||||
|
||||
if !max_cost_exists {
|
||||
// artificial empty condition for computing max cost
|
||||
edges.push((
|
||||
max_cost,
|
||||
conditions_interner
|
||||
.insert(PositionCondition { term: term.clone(), positions: Vec::default() }),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(edges)
|
||||
}
|
||||
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Position(rank)
|
||||
}
|
||||
}
|
||||
|
||||
fn cost_from_position(sum_positions: u32) -> u32 {
|
||||
match sum_positions {
|
||||
fn cost_from_distance(distance: u32) -> u32 {
|
||||
match distance {
|
||||
0 => 0,
|
||||
1 => 1,
|
||||
2..=4 => 2,
|
||||
|
@ -12,13 +12,13 @@ pub fn build_edges(
|
||||
left_term: Option<&LocatedQueryTermSubset>,
|
||||
right_term: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<ProximityCondition>)>> {
|
||||
let right_ngram_length = right_term.term_ids.len();
|
||||
let right_ngram_max = right_term.term_ids.len().saturating_sub(1);
|
||||
|
||||
let Some(left_term) = left_term else {
|
||||
return Ok(vec![(
|
||||
(right_ngram_length - 1) as u32,
|
||||
right_ngram_max as u32,
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
)])
|
||||
)]);
|
||||
};
|
||||
|
||||
if left_term.positions.end() + 1 != *right_term.positions.start() {
|
||||
@ -29,25 +29,25 @@ pub fn build_edges(
|
||||
// The remaining query graph represents `the sun .. are beautiful`
|
||||
// but `sun` and `are` have no proximity condition between them
|
||||
return Ok(vec![(
|
||||
(right_ngram_length - 1) as u32,
|
||||
right_ngram_max as u32,
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
)]);
|
||||
}
|
||||
|
||||
let mut conditions = vec![];
|
||||
for cost in right_ngram_length..(7 + right_ngram_length) {
|
||||
for cost in right_ngram_max..(7 + right_ngram_max) {
|
||||
conditions.push((
|
||||
cost as u32,
|
||||
conditions_interner.insert(ProximityCondition::Uninit {
|
||||
left_term: left_term.clone(),
|
||||
right_term: right_term.clone(),
|
||||
cost: cost as u8,
|
||||
cost: (cost + 1) as u8,
|
||||
}),
|
||||
))
|
||||
}
|
||||
|
||||
conditions.push((
|
||||
(7 + right_ngram_length) as u32,
|
||||
(7 + right_ngram_max) as u32,
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
));
|
||||
|
||||
|
@ -4,6 +4,7 @@ pub mod compute_docids;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::SearchContext;
|
||||
@ -36,4 +37,8 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||
build::build_edges(ctx, conditions_interner, source_term, dest_term)
|
||||
}
|
||||
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Proximity(rank)
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{self, Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
@ -75,4 +76,8 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
}
|
||||
Ok(edges)
|
||||
}
|
||||
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Typo(score_details::Typo::from_rank(rank))
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{self, Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
@ -41,9 +42,10 @@ impl RankingRuleGraphTrait for WordsGraph {
|
||||
_from: Option<&LocatedQueryTermSubset>,
|
||||
to_term: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||
Ok(vec![(
|
||||
to_term.term_ids.len() as u32,
|
||||
conditions_interner.insert(WordsCondition { term: to_term.clone() }),
|
||||
)])
|
||||
Ok(vec![(0, conditions_interner.insert(WordsCondition { term: to_term.clone() }))])
|
||||
}
|
||||
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Words(score_details::Words::from_rank(rank))
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::{QueryGraph, SearchContext};
|
||||
use crate::score_details::ScoreDetails;
|
||||
use crate::Result;
|
||||
|
||||
/// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`]
|
||||
@ -66,4 +67,6 @@ pub struct RankingRuleOutput<Q> {
|
||||
pub query: Q,
|
||||
/// The allowed candidates for the child ranking rule
|
||||
pub candidates: RoaringBitmap,
|
||||
/// The score for the candidates of the current bucket
|
||||
pub score: ScoreDetails,
|
||||
}
|
||||
|
@ -1,9 +1,11 @@
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
|
||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||
use crate::{FieldId, Index, Result};
|
||||
|
||||
@ -49,6 +51,7 @@ pub struct Sort<'ctx, Query> {
|
||||
is_ascending: bool,
|
||||
original_query: Option<Query>,
|
||||
iter: Option<RankingRuleOutputIterWrapper<'ctx, Query>>,
|
||||
must_redact: bool,
|
||||
}
|
||||
impl<'ctx, Query> Sort<'ctx, Query> {
|
||||
pub fn new(
|
||||
@ -59,15 +62,31 @@ impl<'ctx, Query> Sort<'ctx, Query> {
|
||||
) -> Result<Self> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let field_id = fields_ids_map.id(&field_name);
|
||||
let must_redact = Self::must_redact(index, rtxn, &field_name)?;
|
||||
|
||||
Ok(Self { field_name, field_id, is_ascending, original_query: None, iter: None })
|
||||
Ok(Self {
|
||||
field_name,
|
||||
field_id,
|
||||
is_ascending,
|
||||
original_query: None,
|
||||
iter: None,
|
||||
must_redact,
|
||||
})
|
||||
}
|
||||
|
||||
fn must_redact(index: &Index, rtxn: &'ctx heed::RoTxn, field_name: &str) -> Result<bool> {
|
||||
let Some(displayed_fields) = index.displayed_fields(rtxn)? else {
|
||||
return Ok(false);
|
||||
};
|
||||
|
||||
Ok(!displayed_fields.iter().any(|&field| field == field_name))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Query> {
|
||||
fn id(&self) -> String {
|
||||
let Self { field_name, is_ascending, .. } = self;
|
||||
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " })
|
||||
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" })
|
||||
}
|
||||
fn start_iteration(
|
||||
&mut self,
|
||||
@ -118,12 +137,45 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
|
||||
|
||||
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
|
||||
};
|
||||
let number_iter = number_iter.map(|r| -> Result<_> {
|
||||
let (docids, bytes) = r?;
|
||||
Ok((
|
||||
docids,
|
||||
serde_json::Value::Number(
|
||||
serde_json::Number::from_f64(
|
||||
OrderedF64Codec::bytes_decode(bytes).expect("some number"),
|
||||
)
|
||||
.expect("too big float"),
|
||||
),
|
||||
))
|
||||
});
|
||||
let string_iter = string_iter.map(|r| -> Result<_> {
|
||||
let (docids, bytes) = r?;
|
||||
Ok((
|
||||
docids,
|
||||
serde_json::Value::String(
|
||||
StrRefCodec::bytes_decode(bytes).expect("some string").to_owned(),
|
||||
),
|
||||
))
|
||||
});
|
||||
|
||||
let query_graph = parent_query.clone();
|
||||
let ascending = self.is_ascending;
|
||||
let field_name = self.field_name.clone();
|
||||
let must_redact = self.must_redact;
|
||||
RankingRuleOutputIterWrapper::new(Box::new(number_iter.chain(string_iter).map(
|
||||
move |r| {
|
||||
let (docids, _) = r?;
|
||||
Ok(RankingRuleOutput { query: query_graph.clone(), candidates: docids })
|
||||
let (docids, value) = r?;
|
||||
Ok(RankingRuleOutput {
|
||||
query: query_graph.clone(),
|
||||
candidates: docids,
|
||||
score: ScoreDetails::Sort(score_details::Sort {
|
||||
field_name: field_name.clone(),
|
||||
ascending,
|
||||
redacted: must_redact,
|
||||
value,
|
||||
}),
|
||||
})
|
||||
},
|
||||
)))
|
||||
}
|
||||
@ -146,7 +198,16 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
|
||||
Ok(Some(bucket))
|
||||
} else {
|
||||
let query = self.original_query.as_ref().unwrap().clone();
|
||||
Ok(Some(RankingRuleOutput { query, candidates: universe.clone() }))
|
||||
Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: universe.clone(),
|
||||
score: ScoreDetails::Sort(score_details::Sort {
|
||||
field_name: self.field_name.clone(),
|
||||
ascending: self.is_ascending,
|
||||
redacted: self.must_redact,
|
||||
value: serde_json::Value::Null,
|
||||
}),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -122,8 +122,11 @@ fn test_attribute_fid_simple() {
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 6, 5, 4, 3, 9, 7, 8, 11, 10, 12, 13, 14, 0]");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
let document_ids_scores: Vec<_> =
|
||||
documents_ids.iter().zip(document_scores.into_iter()).collect();
|
||||
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -135,6 +138,11 @@ fn test_attribute_fid_ngrams() {
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 6, 5, 4, 3, 9, 7, 8, 11, 10, 12, 13, 14, 0]");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
|
||||
let document_ids_scores: Vec<_> =
|
||||
documents_ids.iter().zip(document_scores.into_iter()).collect();
|
||||
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user