mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-26 15:59:10 +00:00
Compare commits
1495 Commits
v0.14.1
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6939d3d061 | ||
|
|
5d2ca496cb | ||
|
|
523fb5cd56 | ||
|
|
436f61a7f4 | ||
|
|
3fab5869fa | ||
|
|
0515c6e844 | ||
|
|
38176181ac | ||
|
|
0ad7d38eec | ||
|
|
b17ad5c2be | ||
|
|
030a90523d | ||
|
|
56d223a51d | ||
|
|
f558ff826a | ||
|
|
0d2a358cc2 | ||
|
|
595250c93e | ||
|
|
c636988935 | ||
|
|
eea483c470 | ||
|
|
d53c61a6d0 | ||
|
|
c0d4f71a34 | ||
|
|
c0251eb680 | ||
|
|
450b81ca13 | ||
|
|
2f3faadcbf | ||
|
|
5986a2d126 | ||
|
|
d75e84f625 | ||
|
|
c221277fd2 | ||
|
|
fd854035c1 | ||
|
|
4d1c138842 | ||
|
|
7649239b08 | ||
|
|
0e2f6ba1b6 | ||
|
|
f529c46598 | ||
|
|
1ba49d2ddb | ||
|
|
1b5ca88231 | ||
|
|
37329e0784 | ||
|
|
eaff393c76 | ||
|
|
a845cd8880 | ||
|
|
845d3114ea | ||
|
|
287fa7ca74 | ||
|
|
80ed9654e1 | ||
|
|
7ddab7ef31 | ||
|
|
d534a7f7c8 | ||
|
|
5af51c852c | ||
|
|
ee7970f603 | ||
|
|
5453877ca7 | ||
|
|
879cc4ec26 | ||
|
|
6ac2475aba | ||
|
|
47d5f659e0 | ||
|
|
8c9e51e94f | ||
|
|
0da5aca9f6 | ||
|
|
9906db9e64 | ||
|
|
8096b568f0 | ||
|
|
2934a77832 | ||
|
|
cf6cb938a6 | ||
|
|
8ff6b1b540 | ||
|
|
a938a9ab0f | ||
|
|
ae73386723 | ||
|
|
34c8a859eb | ||
|
|
23e35fa526 | ||
|
|
82033f935e | ||
|
|
ae2b0e7aa7 | ||
|
|
948615537b | ||
|
|
a0e129304c | ||
|
|
8d72d538de | ||
|
|
ffefd0caf2 | ||
|
|
fa196986c2 | ||
|
|
a30e02c18c | ||
|
|
c9f3726447 | ||
|
|
8363200fd7 | ||
|
|
37548eb720 | ||
|
|
0a1d2ce231 | ||
|
|
9d01c5d882 | ||
|
|
53fc2edab3 | ||
|
|
b7c5b78a61 | ||
|
|
f081dc2001 | ||
|
|
2cf7daa227 | ||
|
|
9d75fbc619 | ||
|
|
3b1b9a277b | ||
|
|
40e87b9544 | ||
|
|
ded7922be5 | ||
|
|
11ef64ee43 | ||
|
|
5e6d7b7649 | ||
|
|
5fd9616b5f | ||
|
|
a1227648ba | ||
|
|
919f4173cf | ||
|
|
7c5aad4073 | ||
|
|
d47ccd9199 | ||
|
|
cc5e884b34 | ||
|
|
ac5535055f | ||
|
|
15cb4dafa9 | ||
|
|
8ca76d9fdf | ||
|
|
f62e52ec68 | ||
|
|
bf01c674ea | ||
|
|
e9b6a05b75 | ||
|
|
6bbc1b4316 | ||
|
|
3c696da274 | ||
|
|
d9d6dee550 | ||
|
|
cc6306c0e1 | ||
|
|
b59145385e | ||
|
|
3f4e0ec971 | ||
|
|
ec0716ddd1 | ||
|
|
6d6725b3b8 | ||
|
|
6660be2cb7 | ||
|
|
847fcb570b | ||
|
|
4095ec462e | ||
|
|
f7f2421e71 | ||
|
|
b664a46e91 | ||
|
|
06e6eaa7b4 | ||
|
|
30a094cbb2 | ||
|
|
904bae98f8 | ||
|
|
c32f13a909 | ||
|
|
519093ea65 | ||
|
|
bd49d1c4b5 | ||
|
|
2665c0099d | ||
|
|
d65f055030 | ||
|
|
66d87761b7 | ||
|
|
ba69ad672a | ||
|
|
7934e3956b | ||
|
|
68fe93b7db | ||
|
|
efd0ea9e1e | ||
|
|
6ef73eb226 | ||
|
|
fc2f23d36c | ||
|
|
7c39fab453 | ||
|
|
c5164c01c0 | ||
|
|
351ad32d77 | ||
|
|
3ad8311bdd | ||
|
|
ea5ae2bae5 | ||
|
|
72e3adc55e | ||
|
|
b250392e8d | ||
|
|
d8b0d68840 | ||
|
|
c4737749ab | ||
|
|
a1ab02f9fb | ||
|
|
bba64b32ca | ||
|
|
9abd2aa9d7 | ||
|
|
de35a9a605 | ||
|
|
ed750e8792 | ||
|
|
37ca50832c | ||
|
|
31c7a0105b | ||
|
|
ddab9eafa1 | ||
|
|
76a4f86e0c | ||
|
|
6b34318274 | ||
|
|
5508c6c154 | ||
|
|
9a62ac0c94 | ||
|
|
01737ef847 | ||
|
|
3144b572c4 | ||
|
|
10de92987a | ||
|
|
c752c14c46 | ||
|
|
87a8bf5e96 | ||
|
|
ba14ea1243 | ||
|
|
9be90011c6 | ||
|
|
f9b14ca149 | ||
|
|
6591acfdfa | ||
|
|
e64ba122e1 | ||
|
|
a9523146a3 | ||
|
|
392ee86714 | ||
|
|
1d73f484f0 | ||
|
|
cfcd3ae048 | ||
|
|
5395041dcb | ||
|
|
40eabd50d1 | ||
|
|
35ffd0ec3a | ||
|
|
d3d76bf97a | ||
|
|
595ae42e94 | ||
|
|
0667d940f9 | ||
|
|
75d1272325 | ||
|
|
8e2d6cf87d | ||
|
|
9e1bba40f7 | ||
|
|
f7bb499c28 | ||
|
|
b33b1ef3dd | ||
|
|
30aeda7a1a | ||
|
|
22d9d660cc | ||
|
|
7524bfc07f | ||
|
|
bda7472880 | ||
|
|
1ed05c6c07 | ||
|
|
0b3e0a59cb | ||
|
|
0616f68eb0 | ||
|
|
6b8e5a4c92 | ||
|
|
d72c887422 | ||
|
|
664d09e86a | ||
|
|
e226b1a87f | ||
|
|
b227666271 | ||
|
|
6fea050813 | ||
|
|
cf67964133 | ||
|
|
f8d04b11d5 | ||
|
|
3a29cbf0ae | ||
|
|
66f5de9703 | ||
|
|
cbaca2b579 | ||
|
|
a76d9b15c9 | ||
|
|
59636fa688 | ||
|
|
ff0908d3fa | ||
|
|
21f35762ca | ||
|
|
7464720426 | ||
|
|
6e57c40c37 | ||
|
|
c8518f4ab2 | ||
|
|
b9c061ab3d | ||
|
|
d905bbf961 | ||
|
|
6641e7aa50 | ||
|
|
61c15b69fb | ||
|
|
8ec0c4c913 | ||
|
|
0a9d6e8210 | ||
|
|
0ed800b612 | ||
|
|
4ac005b094 | ||
|
|
5e3a53b576 | ||
|
|
e87146b0d9 | ||
|
|
5caa79df67 | ||
|
|
d519e1036f | ||
|
|
19eebc0b0a | ||
|
|
5585020753 | ||
|
|
ef7e7a8f11 | ||
|
|
eb91f27b65 | ||
|
|
24eef577c5 | ||
|
|
e7e4ccf74f | ||
|
|
017ecf76e3 | ||
|
|
1c9ceadd8d | ||
|
|
36ab7b3ebd | ||
|
|
b4038597ba | ||
|
|
79817bd465 | ||
|
|
93ad8f04b5 | ||
|
|
e4cb7ed30f | ||
|
|
b9e060423f | ||
|
|
ead1ec3396 | ||
|
|
306a8cd059 | ||
|
|
4c50deb4b7 | ||
|
|
be75426e64 | ||
|
|
23458de588 | ||
|
|
9fd849d48b | ||
|
|
2b28bc9510 | ||
|
|
d107b3f46c | ||
|
|
44149bec60 | ||
|
|
f80b4fdedd | ||
|
|
fd4a90549b | ||
|
|
b602a0836a | ||
|
|
7349fca607 | ||
|
|
4bacc8e47d | ||
|
|
7141f89c5f | ||
|
|
893654fb15 | ||
|
|
c9e1d054c7 | ||
|
|
2e2eeb0a42 | ||
|
|
0f342ac46e | ||
|
|
29ac324e90 | ||
|
|
23f11e355d | ||
|
|
f09016b2bc | ||
|
|
1fa3aeceeb | ||
|
|
443afdc412 | ||
|
|
776befc1f0 | ||
|
|
3edbc74430 | ||
|
|
3172c96042 | ||
|
|
60473637fe | ||
|
|
b969f34317 | ||
|
|
6c46fbbc57 | ||
|
|
87115b02d9 | ||
|
|
c614520405 | ||
|
|
3756f5a0ca | ||
|
|
5b4e4bb858 | ||
|
|
dffd90b966 | ||
|
|
a92a0c3ed3 | ||
|
|
0774b1efa5 | ||
|
|
7fc7eb7457 | ||
|
|
602a327aa8 | ||
|
|
14c6ae4735 | ||
|
|
493a0e377d | ||
|
|
5e3e108143 | ||
|
|
66dbd3cd34 | ||
|
|
9a1e44dc78 | ||
|
|
37b267ffb3 | ||
|
|
dfa199f98f | ||
|
|
c6d107a05f | ||
|
|
ddbcf449da | ||
|
|
9fa61439b1 | ||
|
|
02dd1ea29d | ||
|
|
a38215de98 | ||
|
|
85b5260d9d | ||
|
|
4b4ebad9a9 | ||
|
|
ece4c739f4 | ||
|
|
85ae34cf9f | ||
|
|
0448f0ce56 | ||
|
|
4835d82a0b | ||
|
|
eaddee9fe2 | ||
|
|
2190764162 | ||
|
|
3b91764587 | ||
|
|
0c3ec549f8 | ||
|
|
fca686e7f8 | ||
|
|
607e28749a | ||
|
|
bffab21b10 | ||
|
|
d9165c7f77 | ||
|
|
2ef58ccce9 | ||
|
|
4009804221 | ||
|
|
151f691609 | ||
|
|
81993b6a15 | ||
|
|
4eb3817b03 | ||
|
|
18cb514073 | ||
|
|
ddd40d87a7 | ||
|
|
137272b8de | ||
|
|
e400ae900d | ||
|
|
c388dca5ec | ||
|
|
6a691db7f8 | ||
|
|
ed783b67ca | ||
|
|
ee372a7b30 | ||
|
|
66f39aaa92 | ||
|
|
03af99650d | ||
|
|
44a2ff07b1 | ||
|
|
fb95540394 | ||
|
|
be00fafb29 | ||
|
|
0bc376a17b | ||
|
|
05d5de47cb | ||
|
|
d3eb604d66 | ||
|
|
d6db210ef3 | ||
|
|
80ca42922f | ||
|
|
fe5df6d06f | ||
|
|
535442179e | ||
|
|
b17dae9ac0 | ||
|
|
5fad37aebd | ||
|
|
311933614e | ||
|
|
8fa6502b16 | ||
|
|
1f537e1b60 | ||
|
|
5bac65f8b8 | ||
|
|
911630000f | ||
|
|
6e8a3fe8de | ||
|
|
2a14948123 | ||
|
|
61e5eed493 | ||
|
|
d30830a55c | ||
|
|
102c46f88b | ||
|
|
5fa9bc67d7 | ||
|
|
3503fbf7fe | ||
|
|
1cc733f801 | ||
|
|
7a27cbcc78 | ||
|
|
6f8e670dee | ||
|
|
df4e9f4e1e | ||
|
|
3747f5bdd8 | ||
|
|
56766cffc3 | ||
|
|
692c676625 | ||
|
|
ddfd7def35 | ||
|
|
bcaee4d179 | ||
|
|
539a57026d | ||
|
|
654f49ccec | ||
|
|
c1376a9f2a | ||
|
|
9ac999ca59 | ||
|
|
6a1964f146 | ||
|
|
90018755c5 | ||
|
|
95211e2665 | ||
|
|
7cd94e5486 | ||
|
|
35ef6a9204 | ||
|
|
41272e7148 | ||
|
|
8ff39d8432 | ||
|
|
e22f57cae5 | ||
|
|
67afb0e3fe | ||
|
|
f56f31c277 | ||
|
|
b7c4754be2 | ||
|
|
3118f32221 | ||
|
|
c9cc504e7b | ||
|
|
b9d189bf12 | ||
|
|
c32012c44a | ||
|
|
dfce44fa3b | ||
|
|
42a6260b65 | ||
|
|
5353be74c3 | ||
|
|
12542bf922 | ||
|
|
def737edee | ||
|
|
60518449fc | ||
|
|
09d4e37044 | ||
|
|
e14640e530 | ||
|
|
7f734f0a18 | ||
|
|
cd2f886234 | ||
|
|
dda4fe10b3 | ||
|
|
148a896cca | ||
|
|
e8eb589d6e | ||
|
|
770b6d25ae | ||
|
|
be10d90f07 | ||
|
|
fd3fa1ef45 | ||
|
|
a57943b77e | ||
|
|
6fafdb7711 | ||
|
|
0f7625e29a | ||
|
|
7afccfc92d | ||
|
|
6e59da26d4 | ||
|
|
928930ddd5 | ||
|
|
6d2f7af642 | ||
|
|
5b995ba080 | ||
|
|
168a1315de | ||
|
|
c101b2a5cb | ||
|
|
971c361e0f | ||
|
|
be50b2bec6 | ||
|
|
49c918defa | ||
|
|
d595623162 | ||
|
|
169e739634 | ||
|
|
08138c7c23 | ||
|
|
bbb012ad0f | ||
|
|
331d28102f | ||
|
|
efa69875d9 | ||
|
|
59797bfc7b | ||
|
|
c0f9c891f5 | ||
|
|
33514b28be | ||
|
|
e3a913e03f | ||
|
|
7e80337e5b | ||
|
|
8d4723d91b | ||
|
|
4cdf680a81 | ||
|
|
63e67f72e3 | ||
|
|
0cd66c3a89 | ||
|
|
b092a624ed | ||
|
|
24e84d7ca1 | ||
|
|
14f9056349 | ||
|
|
b444e2e074 | ||
|
|
723cb4d520 | ||
|
|
90116155b4 | ||
|
|
0d01c0e935 | ||
|
|
e002509bf2 | ||
|
|
19c5c74291 | ||
|
|
b6fec60243 | ||
|
|
9d0fa8112b | ||
|
|
d30f5b1bef | ||
|
|
7691b0d721 | ||
|
|
b8c954eb3f | ||
|
|
a8c146fd13 | ||
|
|
70df41bc62 | ||
|
|
1782753387 | ||
|
|
23ccf4429e | ||
|
|
bf4e799dba | ||
|
|
cb695bdec3 | ||
|
|
be70eb881a | ||
|
|
867c277088 | ||
|
|
96f72f009a | ||
|
|
cf4a466b6b | ||
|
|
087e4626ce | ||
|
|
64462c842b | ||
|
|
e0f73fe742 | ||
|
|
ea4c831de0 | ||
|
|
51387b2c80 | ||
|
|
2d8dd87cad | ||
|
|
d9dd2a038b | ||
|
|
1227ce8091 | ||
|
|
cd63c80be8 | ||
|
|
e0a5eebe79 | ||
|
|
850069af75 | ||
|
|
672fcee8aa | ||
|
|
d9b023c11f | ||
|
|
6b228f56cb | ||
|
|
dd645e6da4 | ||
|
|
149f46c184 | ||
|
|
96839c48c9 | ||
|
|
3e27d5e885 | ||
|
|
38fc876704 | ||
|
|
39d5a99095 | ||
|
|
2beb306834 | ||
|
|
f3e595e2f0 | ||
|
|
5d80d11b23 | ||
|
|
621529e9dc | ||
|
|
535aff8f7e | ||
|
|
7531280764 | ||
|
|
63daa8b15a | ||
|
|
92913e1eb8 | ||
|
|
418be3daa8 | ||
|
|
7e3b2ddff2 | ||
|
|
312d93961a | ||
|
|
8f05d8d546 | ||
|
|
f5ddea481a | ||
|
|
29ca8271b3 | ||
|
|
3084537d1e | ||
|
|
86ac994543 | ||
|
|
992b082c6f | ||
|
|
31fe263356 | ||
|
|
7a0b20c740 | ||
|
|
9810f6b695 | ||
|
|
09c74c04a0 | ||
|
|
b6cc932c09 | ||
|
|
1b5d918cb9 | ||
|
|
bf76d4a43c | ||
|
|
53b4b2fcbc | ||
|
|
9a8629a6a9 | ||
|
|
78308365ec | ||
|
|
976075578f | ||
|
|
243233f652 | ||
|
|
d66eea42bb | ||
|
|
c55f73bbc3 | ||
|
|
3e30d4270b | ||
|
|
80916baa21 | ||
|
|
1df8f041bd | ||
|
|
6a6e2a8cd1 | ||
|
|
f9d337b320 | ||
|
|
feb069f604 | ||
|
|
7e0eed5772 | ||
|
|
9bdd040dd0 | ||
|
|
e5dabf265a | ||
|
|
1a1046a0ef | ||
|
|
dd18319b44 | ||
|
|
d3cd7e92d1 | ||
|
|
553e7d8aaa | ||
|
|
f79b8287f5 | ||
|
|
b4c98f6cc3 | ||
|
|
5d4a0ac844 | ||
|
|
0136b02e5b | ||
|
|
f49a01703a | ||
|
|
e4f82aa441 | ||
|
|
751d1af2a6 | ||
|
|
076d8fbb84 | ||
|
|
4b2d01a453 | ||
|
|
a71fa25ebe | ||
|
|
b4db54cb1f | ||
|
|
b2ca600e79 | ||
|
|
83725a1330 | ||
|
|
587b837a6c | ||
|
|
2844fe959f | ||
|
|
41e271974a | ||
|
|
520d37983c | ||
|
|
487d82773a | ||
|
|
066085f6f5 | ||
|
|
0d1f5b7193 | ||
|
|
2f3a439566 | ||
|
|
9681ffca52 | ||
|
|
fddc60f893 | ||
|
|
0f024cc225 | ||
|
|
575ec2a06f | ||
|
|
83aef0a27d | ||
|
|
bc85d30076 | ||
|
|
bc417726fc | ||
|
|
9949a2a930 | ||
|
|
71e1cb472f | ||
|
|
38161ede33 | ||
|
|
70dd1e6263 | ||
|
|
e626c9c8b9 | ||
|
|
fa5f8f9531 | ||
|
|
acfe31151e | ||
|
|
cb71b714d7 | ||
|
|
4c6655f68c | ||
|
|
490836a7b3 | ||
|
|
c11c909bad | ||
|
|
5c9401ad94 | ||
|
|
768987583a | ||
|
|
cb58a8c776 | ||
|
|
4f0d3b065f | ||
|
|
a95c44193d | ||
|
|
2830853665 | ||
|
|
a4ca79c9b3 | ||
|
|
85b0878334 | ||
|
|
d61852a73f | ||
|
|
14b6224de7 | ||
|
|
f0958c7d9b | ||
|
|
01de7f9e36 | ||
|
|
9f9148a1c6 | ||
|
|
73db1b3822 | ||
|
|
abca68bf24 | ||
|
|
eeca841a21 | ||
|
|
3a9b86ad55 | ||
|
|
f1cc141f6c | ||
|
|
3011209e28 | ||
|
|
29bf6a8d42 | ||
|
|
c282466750 | ||
|
|
de9ea94f57 | ||
|
|
fe7640555d | ||
|
|
ec809ca487 | ||
|
|
1dc99ea451 | ||
|
|
f12ace3fbf | ||
|
|
c09e610bb5 | ||
|
|
712abf4c5f | ||
|
|
261df4b386 | ||
|
|
b0f399a51d | ||
|
|
348d112388 | ||
|
|
5c35a5d9fc | ||
|
|
a26bb50d62 | ||
|
|
a59f437ee3 | ||
|
|
d74c698adc | ||
|
|
8d8fe8fd29 | ||
|
|
c1c50f6714 | ||
|
|
d7ca68d8e9 | ||
|
|
01b09c065b | ||
|
|
08104fd49c | ||
|
|
3b601f615a | ||
|
|
b1f7fe24f6 | ||
|
|
fbd58f2eec | ||
|
|
79fc3bb84e | ||
|
|
8e4928c7ea | ||
|
|
d078cbf39b | ||
|
|
561596d8bc | ||
|
|
549b489c8a | ||
|
|
1e9f374ff8 | ||
|
|
817fcfdd88 | ||
|
|
fab50256bc | ||
|
|
b044608b25 | ||
|
|
ce4fb8ce20 | ||
|
|
adf91d286b | ||
|
|
0c1c7a3dd9 | ||
|
|
5b71751391 | ||
|
|
12f6709e1c | ||
|
|
5229f1e220 | ||
|
|
b6ca7929eb | ||
|
|
43204ca67b | ||
|
|
ad8d9a97d6 | ||
|
|
36f32f58d4 | ||
|
|
b4fd4212ad | ||
|
|
a1d34faaad | ||
|
|
a2368db154 | ||
|
|
381e07b7b6 | ||
|
|
74bb748a4e | ||
|
|
09113fc73c | ||
|
|
8638c9ab77 | ||
|
|
b676b10cfe | ||
|
|
f68c257452 | ||
|
|
880fc069bd | ||
|
|
a838238a63 | ||
|
|
834995b130 | ||
|
|
b000ae7614 | ||
|
|
f62779671b | ||
|
|
4b292c6e9b | ||
|
|
1c13100948 | ||
|
|
71226feb74 | ||
|
|
b9b4feada8 | ||
|
|
3175f09989 | ||
|
|
322d6b8cfe | ||
|
|
da36a6b5cd | ||
|
|
f2b2ca6d55 | ||
|
|
0ebe3900e0 | ||
|
|
ec3140a29e | ||
|
|
00b0a00fc5 | ||
|
|
adb970edcc | ||
|
|
6d24a4744f | ||
|
|
b1a5ef0aab | ||
|
|
7ec752ed1c | ||
|
|
0de696feaf | ||
|
|
d6b53c5e7a | ||
|
|
3456a78552 | ||
|
|
eb3d63691a | ||
|
|
c4ee937635 | ||
|
|
f6d1fb7ac2 | ||
|
|
97ef4a6c22 | ||
|
|
db7215eaa9 | ||
|
|
4b37a4a415 | ||
|
|
d1ad23e2d8 | ||
|
|
caa231aebe | ||
|
|
9cc31c2258 | ||
|
|
e2844f3a92 | ||
|
|
2e3d85c31a | ||
|
|
25af262e79 | ||
|
|
d0ef1ef174 | ||
|
|
905ace3e13 | ||
|
|
9092d35a3c | ||
|
|
2bdaa70f31 | ||
|
|
f91a3bc6ab | ||
|
|
1e4592dd7e | ||
|
|
50dc2fc7a5 | ||
|
|
76727455ca | ||
|
|
cf94b8e6e0 | ||
|
|
1cf9f43dfe | ||
|
|
2097554c09 | ||
|
|
56686dee40 | ||
|
|
763ee521be | ||
|
|
0bfdf9a785 | ||
|
|
fa573dabf0 | ||
|
|
abdf642d68 | ||
|
|
0dfd1b74c8 | ||
|
|
0d3fb5ee0d | ||
|
|
02277ec2cf | ||
|
|
70661ce50d | ||
|
|
8fc12b1526 | ||
|
|
439db1aae0 | ||
|
|
8afbb9c462 | ||
|
|
5c52a1393f | ||
|
|
112cd1787c | ||
|
|
d1550670a8 | ||
|
|
58f9974be4 | ||
|
|
3a2e7d3c3b | ||
|
|
c1b6f0e833 | ||
|
|
5f08e41a85 | ||
|
|
5d8a21b0de | ||
|
|
9e8888b603 | ||
|
|
623b71e81e | ||
|
|
c5c7e76805 | ||
|
|
e4b3d35ed8 | ||
|
|
33e55bd82e | ||
|
|
9543ab4db6 | ||
|
|
97909ce56e | ||
|
|
2f2484e186 | ||
|
|
2062b10b79 | ||
|
|
a0b022afee | ||
|
|
5a47cef9a8 | ||
|
|
9538790b33 | ||
|
|
4e2568fd6e | ||
|
|
dc5a3d4a62 | ||
|
|
7b02fdaddc | ||
|
|
c0d169e79e | ||
|
|
9840b5c7fb | ||
|
|
1ef061d92b | ||
|
|
79a1212ebe | ||
|
|
8d0269fcc4 | ||
|
|
5e656bb58a | ||
|
|
d9c0190497 | ||
|
|
5dffe566fd | ||
|
|
b769877183 | ||
|
|
446b66b0fe | ||
|
|
d0ec081e49 | ||
|
|
65130d9ee7 | ||
|
|
638009fb2b | ||
|
|
7f84f59472 | ||
|
|
4f8c771bb5 | ||
|
|
9e69f33f3c | ||
|
|
0da8fa115e | ||
|
|
811bc2f421 | ||
|
|
caaf8d3f40 | ||
|
|
7473cc6e27 | ||
|
|
56c9633c53 | ||
|
|
93002e734c | ||
|
|
60f6d1c373 | ||
|
|
a03d9d496e | ||
|
|
7904637893 | ||
|
|
def1596eaf | ||
|
|
5795254b2a | ||
|
|
fe5a494035 | ||
|
|
13e864d29f | ||
|
|
a780cff8fd | ||
|
|
7cb2dcbdf8 | ||
|
|
f068d7f978 | ||
|
|
18d4d6097a | ||
|
|
b119bb4ab0 | ||
|
|
d65b5db97f | ||
|
|
d4be4d80db | ||
|
|
9996c59183 | ||
|
|
88bf867a3e | ||
|
|
7009906d55 | ||
|
|
ca1bb7dc1c | ||
|
|
aa04124bfc | ||
|
|
2be834fced | ||
|
|
11c81ab4cb | ||
|
|
0f767e3743 | ||
|
|
92d954ddfe | ||
|
|
1e659bb17b | ||
|
|
e8bd5ea4e0 | ||
|
|
d765397c82 | ||
|
|
d46a2713d2 | ||
|
|
8932f302ce | ||
|
|
51105d3b1c | ||
|
|
efc1225cd8 | ||
|
|
41220a7f96 | ||
|
|
7312c13665 | ||
|
|
e6220a1346 | ||
|
|
3ef0830c5d | ||
|
|
eb7616ca0f | ||
|
|
592fcbc71f | ||
|
|
20e1caef47 | ||
|
|
2d19b78dd8 | ||
|
|
99551fc21b | ||
|
|
d30641e9ca | ||
|
|
2716c1aebb | ||
|
|
1a65eed724 | ||
|
|
a26a0a4eec | ||
|
|
a56ac66e6c | ||
|
|
7e2d7601f2 | ||
|
|
1550b7d6ba | ||
|
|
9f40896f4a | ||
|
|
75c0718691 | ||
|
|
509a56a43d | ||
|
|
2d7785ae0c | ||
|
|
d0552e765e | ||
|
|
3a7c1f2469 | ||
|
|
df6ba0e824 | ||
|
|
6609f9e3be | ||
|
|
1c4f0b2ccf | ||
|
|
10fc870684 | ||
|
|
dffbaca63b | ||
|
|
b3c8f0e1f6 | ||
|
|
bc5a5e37ea | ||
|
|
33c6c4f0ee | ||
|
|
39c16c0fe4 | ||
|
|
1cb64caae4 | ||
|
|
b258f4f394 | ||
|
|
c47369839b | ||
|
|
b924e897f1 | ||
|
|
e818c33fec | ||
|
|
9278a6fe59 | ||
|
|
3593ebb8aa | ||
|
|
464639aa0f | ||
|
|
4acbe8e473 | ||
|
|
7ad553670f | ||
|
|
2185fb8367 | ||
|
|
cbcf50960f | ||
|
|
89846d1656 | ||
|
|
e5175f5dc1 | ||
|
|
1a6dcec83a | ||
|
|
fe260f1330 | ||
|
|
991d8e1ec6 | ||
|
|
49a0e8aa19 | ||
|
|
912f0286b3 | ||
|
|
dcf29e1081 | ||
|
|
529f7962f4 | ||
|
|
8a11c6c429 | ||
|
|
4cbf866821 | ||
|
|
e0e23636c6 | ||
|
|
295f496e8a | ||
|
|
47a1bc34de | ||
|
|
6d837e3e07 | ||
|
|
1b671d4302 | ||
|
|
c30b32e173 | ||
|
|
9e798fea75 | ||
|
|
384afb3455 | ||
|
|
92a7c8cd17 | ||
|
|
8b7735c20a | ||
|
|
7d748fa384 | ||
|
|
d767990424 | ||
|
|
ef438852cd | ||
|
|
40ced3ff8d | ||
|
|
5f5402a3ab | ||
|
|
26dcb9e66d | ||
|
|
956012da95 | ||
|
|
24192fc550 | ||
|
|
efca63f9ce | ||
|
|
c3552cecdf | ||
|
|
0f94ef8abc | ||
|
|
0275b36fb0 | ||
|
|
1b5fc61eb6 | ||
|
|
0fee81678e | ||
|
|
c4d898a265 | ||
|
|
e389c088eb | ||
|
|
ceb8d6e1c9 | ||
|
|
0cc79d414f | ||
|
|
8d11b368d1 | ||
|
|
706643dfed | ||
|
|
b192cb9c1f | ||
|
|
998d5ead34 | ||
|
|
ec7eb7798f | ||
|
|
a717925caa | ||
|
|
88ae02f8d9 | ||
|
|
eb03a3ccb1 | ||
|
|
77740829bd | ||
|
|
928fb34eff | ||
|
|
1e6b40a24b | ||
|
|
78217bcf18 | ||
|
|
53c88d9fa3 | ||
|
|
b14fdb1163 | ||
|
|
3d5fba94c2 | ||
|
|
3ee2b07918 | ||
|
|
8bc7dd8b03 | ||
|
|
e6fd1afc3d | ||
|
|
a961f0ce75 | ||
|
|
cea0c1f41d | ||
|
|
703d2026e4 | ||
|
|
3d85b2d854 | ||
|
|
bb79a15c04 | ||
|
|
4fe2a13c71 | ||
|
|
51829ad85e | ||
|
|
c78f351300 | ||
|
|
ee675eadf1 | ||
|
|
33830d5ecf | ||
|
|
2b154524bb | ||
|
|
b626d02ffe | ||
|
|
9ce68d11a7 | ||
|
|
5a38f13cae | ||
|
|
7055384aeb | ||
|
|
0c41adf868 | ||
|
|
1ba46f8f77 | ||
|
|
f80ea24d2b | ||
|
|
d34d7cbc37 | ||
|
|
5014f74649 | ||
|
|
1f32f35d9e | ||
|
|
f3b6bf55a6 | ||
|
|
9e6a7e3aa9 | ||
|
|
77481d7c76 | ||
|
|
c2461e5066 | ||
|
|
e4bd1bc5ce | ||
|
|
90f57c1329 | ||
|
|
6af769af20 | ||
|
|
6bcf20c70e | ||
|
|
bb79695e44 | ||
|
|
ea5517bc8c | ||
|
|
da08a1f25c | ||
|
|
a72d2f66cd | ||
|
|
e5df58bc04 | ||
|
|
662ffc8fa5 | ||
|
|
ce5e4743e6 | ||
|
|
dd2914873b | ||
|
|
d9a29cae60 | ||
|
|
7a737d2bd3 | ||
|
|
881b099c8e | ||
|
|
c6bb36efa5 | ||
|
|
526a05565e | ||
|
|
09f13823f4 | ||
|
|
b8e535579f | ||
|
|
63d443deb8 | ||
|
|
f8c338e3a7 | ||
|
|
6c470cf687 | ||
|
|
ec63e13896 | ||
|
|
1746132c7d | ||
|
|
ec230c2835 | ||
|
|
bf3c04f2dc | ||
|
|
45665245dc | ||
|
|
94c5c5843b | ||
|
|
c05d260d9a | ||
|
|
8eceba98d3 | ||
|
|
2c380731b9 | ||
|
|
7ce74f95a2 | ||
|
|
a3813dd453 | ||
|
|
ec3a08ea0c | ||
|
|
b0717b75d9 | ||
|
|
6359a08cfe | ||
|
|
f87afbc558 | ||
|
|
8df5f73706 | ||
|
|
9eaf048a06 | ||
|
|
adfdb99abc | ||
|
|
ae1655586c | ||
|
|
698a1ea582 | ||
|
|
87412f63ef | ||
|
|
09d9a29176 | ||
|
|
dd9eae8c26 | ||
|
|
a1d04fbff5 | ||
|
|
dd1a08087b | ||
|
|
51ba1bd7d3 | ||
|
|
f881e8691e | ||
|
|
94c0858c27 | ||
|
|
6aaa4a8e19 | ||
|
|
cb23775d18 | ||
|
|
0344cf5874 | ||
|
|
4a1b033765 | ||
|
|
dcd60a5b45 | ||
|
|
b1962c8e02 | ||
|
|
40ef9a3c6a | ||
|
|
2206a44baf | ||
|
|
4ee6ce7871 | ||
|
|
6cb8052d3d | ||
|
|
73973e2b9e | ||
|
|
89e05fc6c5 | ||
|
|
248e9b3808 | ||
|
|
79c63049d7 | ||
|
|
96cffeab1e | ||
|
|
39a18d4edc | ||
|
|
6e1ddfea5a | ||
|
|
d8af4a7202 | ||
|
|
3d51db5929 | ||
|
|
b0956c09c1 | ||
|
|
a294462a06 | ||
|
|
5bc464dc53 | ||
|
|
7807a8dcff | ||
|
|
0bad5529d8 | ||
|
|
4fe885408b | ||
|
|
9a1ab4e69f | ||
|
|
e0b3c4f82f | ||
|
|
ac858d9800 | ||
|
|
7050236a93 | ||
|
|
0f2143e7fd | ||
|
|
b9f79c8df0 | ||
|
|
9587ea7f06 | ||
|
|
7f68b83cb7 | ||
|
|
d7c077cffb | ||
|
|
7d6ec7f3d3 | ||
|
|
f3dc853be3 | ||
|
|
28095c6454 | ||
|
|
48507460b2 | ||
|
|
bb7d3be1b8 | ||
|
|
d029464de8 | ||
|
|
79d09705d8 | ||
|
|
868658f3d8 | ||
|
|
fe87477238 | ||
|
|
d892a2643e | ||
|
|
83ffdc888a | ||
|
|
4041d9dc48 | ||
|
|
1f16c8d224 | ||
|
|
06f9dae0f3 | ||
|
|
48d5f88c1a | ||
|
|
eb53ed4cc1 | ||
|
|
46293546f3 | ||
|
|
3cc3637e2d | ||
|
|
1f51fc8baf | ||
|
|
e9da191b7d | ||
|
|
d73fbdef2e | ||
|
|
44dcfe29aa | ||
|
|
a85e7abb0c | ||
|
|
4847884165 | ||
|
|
7f6a54cb12 | ||
|
|
520f7c09ba | ||
|
|
35a7b800eb | ||
|
|
c966b1dd94 | ||
|
|
ee838be41b | ||
|
|
127e944866 | ||
|
|
cc81aca6a4 | ||
|
|
46d7cedb18 | ||
|
|
5f33672f0e | ||
|
|
b690f1103a | ||
|
|
91089db444 | ||
|
|
70fd4f109d | ||
|
|
186b0869df | ||
|
|
7652fc1a04 | ||
|
|
2f418ee767 | ||
|
|
2ecde74fa4 | ||
|
|
7ecefe37da | ||
|
|
89d13706f1 | ||
|
|
d4b1331a0a | ||
|
|
147756750b | ||
|
|
8b99860e85 | ||
|
|
a2c8dae914 | ||
|
|
1640d9ea91 | ||
|
|
6b4ea7f594 | ||
|
|
c8b05712fa | ||
|
|
56b4782ee1 | ||
|
|
b6831320f9 | ||
|
|
8a52979ffa | ||
|
|
ca3b343b1f | ||
|
|
f8ea081df5 | ||
|
|
588bc8f9ef | ||
|
|
233c1e304d | ||
|
|
a268d0e283 | ||
|
|
9992c36ced | ||
|
|
81255814b1 | ||
|
|
764ced8b5c | ||
|
|
3c25ab0d50 | ||
|
|
63a3a1fd90 | ||
|
|
761c2b0639 | ||
|
|
c6dbd81823 | ||
|
|
13c5289ff1 | ||
|
|
23fae3328b | ||
|
|
85f3b192d5 | ||
|
|
204c743bcc | ||
|
|
4aaa561147 | ||
|
|
018cadc598 | ||
|
|
2138f54954 | ||
|
|
0a0eee4993 | ||
|
|
e0c5740050 | ||
|
|
0c27bea135 | ||
|
|
1145599c04 | ||
|
|
9dd1ecdc2a | ||
|
|
f4cf96915a | ||
|
|
f6d0689967 | ||
|
|
a2ac2de011 | ||
|
|
6a742ee62c | ||
|
|
58fab035bb | ||
|
|
07bb1e2c4e | ||
|
|
94bd14ede3 | ||
|
|
0c17b166df | ||
|
|
dd324807f9 | ||
|
|
c29b86849b | ||
|
|
abbea59732 | ||
|
|
01479dcf99 | ||
|
|
0c80d891c0 | ||
|
|
f727dcc8c6 | ||
|
|
55fadd7f87 | ||
|
|
fcf1d4e922 | ||
|
|
c079f60346 | ||
|
|
77c0a0fba5 | ||
|
|
adc71a70ce | ||
|
|
99c89cf2ba | ||
|
|
49b74b587a | ||
|
|
c61fab1435 | ||
|
|
2ee2e6a9b2 | ||
|
|
c4846dafca | ||
|
|
77d5dd452f | ||
|
|
e4d45b0500 | ||
|
|
7d9637861f | ||
|
|
271c8ba991 | ||
|
|
8617bcf8bd | ||
|
|
66b64c1f80 | ||
|
|
30dd790884 | ||
|
|
40b3451a4e | ||
|
|
3f68460d6c | ||
|
|
79a4bc8129 | ||
|
|
1fad72e019 | ||
|
|
2ae90f9c5d | ||
|
|
53cf500e36 | ||
|
|
a56e8c1a0c | ||
|
|
0cd8869349 | ||
|
|
5ca3382f5c | ||
|
|
dcc6f20f31 | ||
|
|
5ecf514d28 | ||
|
|
8061a04661 | ||
|
|
562da9dd3f | ||
|
|
f475385788 | ||
|
|
9661ee5d64 | ||
|
|
4a0f5f1b03 | ||
|
|
ce652fc8df | ||
|
|
07e7acc35d | ||
|
|
51e0d6d5ee | ||
|
|
4e1597bd1d | ||
|
|
06403a5708 | ||
|
|
9d421d5ed4 | ||
|
|
e9b90d5380 | ||
|
|
944a5bb36e | ||
|
|
2f93cce7aa | ||
|
|
ac4d795eff | ||
|
|
ced32afd9f | ||
|
|
281a445998 | ||
|
|
d9254c4355 | ||
|
|
86211b1ddd | ||
|
|
7d28f8cff0 | ||
|
|
f4f42ec441 | ||
|
|
3992d917ec | ||
|
|
964e52ef08 | ||
|
|
65ca80bdde | ||
|
|
b8ebf07555 | ||
|
|
f04dd2af39 | ||
|
|
d52e6fc21e | ||
|
|
561f29042c | ||
|
|
3987d17e40 | ||
|
|
c0515bcfe2 | ||
|
|
7d2ae9089e | ||
|
|
4552c42f88 | ||
|
|
a9c7b73744 | ||
|
|
c2282ab5cb | ||
|
|
f090f42e7a | ||
|
|
6a0a9fec6b | ||
|
|
a955e04ab6 | ||
|
|
ae5581d37c | ||
|
|
181eaf95f5 | ||
|
|
581dcd5735 | ||
|
|
f3d65ec5e9 | ||
|
|
17b84691f2 | ||
|
|
47138c7632 | ||
|
|
8432c8584a | ||
|
|
a56db854a2 | ||
|
|
9e2a95b1a3 | ||
|
|
ae3c8af56c | ||
|
|
70dce6cc0b | ||
|
|
77083d9e80 | ||
|
|
4a66803d76 | ||
|
|
eff8570f59 | ||
|
|
3cd799a744 | ||
|
|
e285404c3e | ||
|
|
70d935a2da | ||
|
|
7c7143d435 | ||
|
|
9aca6fab88 | ||
|
|
d1f34f926e | ||
|
|
62532b8f79 | ||
|
|
402203aa2a | ||
|
|
cf97b9ff2b | ||
|
|
e7b541a2af | ||
|
|
4cf66831d4 | ||
|
|
f41284a133 | ||
|
|
a77d517ac1 | ||
|
|
fc351b54d9 | ||
|
|
c2fdb0ad4d | ||
|
|
1968bfac4d | ||
|
|
c4dfd5f0c3 | ||
|
|
ac2af4354d | ||
|
|
9227b7cb2f | ||
|
|
e1e5935e3c | ||
|
|
4316d991a2 | ||
|
|
d1be3d60df | ||
|
|
a9a9ed6318 | ||
|
|
79708aeb67 | ||
|
|
0c2777dfd5 | ||
|
|
5ba58c1e9c | ||
|
|
c994fe4609 | ||
|
|
658166c05e | ||
|
|
6bcc302950 | ||
|
|
d8a337fcac | ||
|
|
672a4b5400 | ||
|
|
61ce749122 | ||
|
|
ee02d55e67 | ||
|
|
417d0ae92a | ||
|
|
22108f9f90 | ||
|
|
101e050746 | ||
|
|
45d8f36f5e | ||
|
|
caaaf15fd6 | ||
|
|
60a42bc511 | ||
|
|
3f939f3ccf | ||
|
|
7d9c5f64aa | ||
|
|
c7ab4dccc3 | ||
|
|
ac89c35edc | ||
|
|
af2cbd0258 | ||
|
|
0a3e946726 | ||
|
|
d3758b6f76 | ||
|
|
c95bf0cdf0 | ||
|
|
4bca26298e | ||
|
|
ded6483173 | ||
|
|
097cae90a7 | ||
|
|
739c860cfd | ||
|
|
f01bb9cee3 | ||
|
|
b8b8cc1312 | ||
|
|
27a7238d3f | ||
|
|
ec9dcd3285 | ||
|
|
ba2cfcc72d | ||
|
|
5270cc0eae | ||
|
|
2bb695d60f | ||
|
|
556ba956b8 | ||
|
|
b1226be2c8 | ||
|
|
b293948d36 | ||
|
|
ed3f8f5cc0 | ||
|
|
4c5effe714 | ||
|
|
68692a256e | ||
|
|
72eed0e369 | ||
|
|
588add8bec | ||
|
|
a7bd0681a0 | ||
|
|
999758f7a1 | ||
|
|
2d7b2e651d | ||
|
|
b723f23f14 | ||
|
|
ae9a41a19f | ||
|
|
86f32e4ee4 | ||
|
|
1873c0399a | ||
|
|
47eeed0a4c | ||
|
|
91d6e90d5d | ||
|
|
4d08f04db2 | ||
|
|
93ce32d94d | ||
|
|
4fe90a1a1c | ||
|
|
22c204fea6 | ||
|
|
e1253b6969 | ||
|
|
f175d20599 | ||
|
|
4d9819f6ef | ||
|
|
bead4075d8 | ||
|
|
1823fa18c9 | ||
|
|
4738fa94d0 | ||
|
|
aad5b789a7 | ||
|
|
5c0b541248 | ||
|
|
a9e9e72840 | ||
|
|
a580a6a44d | ||
|
|
1eaf28f823 | ||
|
|
3a634cb583 | ||
|
|
8bb1b6146f | ||
|
|
6c7175dfc2 | ||
|
|
28b9c158b1 | ||
|
|
4ea0e0fc05 | ||
|
|
b28be43cc6 | ||
|
|
4a71861066 | ||
|
|
5f25703d44 | ||
|
|
c317af58bc | ||
|
|
a8ba809656 | ||
|
|
6766de437f | ||
|
|
fa7379e129 | ||
|
|
9fb0d94fc3 | ||
|
|
8fd9dc231c | ||
|
|
4ca46b9e5f | ||
|
|
90b930ed7f | ||
|
|
f44f8a823a | ||
|
|
e89b11b1fa | ||
|
|
e0976d10ba | ||
|
|
ea681026f7 | ||
|
|
759f6b48ee | ||
|
|
ec047eefd2 | ||
|
|
811426b161 | ||
|
|
b1d9ad7134 | ||
|
|
e000e10e01 | ||
|
|
8dea9662dc | ||
|
|
ed44e684cc | ||
|
|
f18e795124 | ||
|
|
f1c09a54be | ||
|
|
8d462afb79 | ||
|
|
f988306691 | ||
|
|
d43dc4824c | ||
|
|
482f734e53 | ||
|
|
f8f02af23e | ||
|
|
cb50781d2d | ||
|
|
1df0fdf3e2 | ||
|
|
a95a18afe4 | ||
|
|
9af0a08122 | ||
|
|
69c91d2b56 | ||
|
|
97ba5e97c6 | ||
|
|
8760beed1c | ||
|
|
15464e57af | ||
|
|
c984fa1071 | ||
|
|
97f35de41f | ||
|
|
81e9fd8933 | ||
|
|
17c463ca61 | ||
|
|
f0ca193122 | ||
|
|
940f83698c | ||
|
|
ccb7104dee | ||
|
|
da056a6877 | ||
|
|
e9c95f6623 | ||
|
|
f37a420a04 | ||
|
|
6c63ee6798 | ||
|
|
60371b9dcf | ||
|
|
4119ae8655 | ||
|
|
8183202868 | ||
|
|
74410d8c6b | ||
|
|
c1808513fe | ||
|
|
eeccdce33a | ||
|
|
a6667b14df | ||
|
|
62e908264e | ||
|
|
2fe52d0a4f | ||
|
|
d01c93aeee | ||
|
|
c75ffbf3d5 | ||
|
|
e3e475c5b1 | ||
|
|
6a3f625e11 | ||
|
|
1d910dbb42 | ||
|
|
bf3f36b46e | ||
|
|
686f987180 | ||
|
|
334933b874 | ||
|
|
d22fab5bae | ||
|
|
ddd7789713 | ||
|
|
ff38220b68 | ||
|
|
7a7cb9bcbf | ||
|
|
fe9c99a11b | ||
|
|
9b47bbc1ac | ||
|
|
430a5f902b | ||
|
|
bc0d53e819 | ||
|
|
0bb8b3a68d | ||
|
|
e5c220b82c | ||
|
|
60c636738b | ||
|
|
06b2a587af | ||
|
|
26b1e5a51b | ||
|
|
81f343a46a | ||
|
|
956adfc90a | ||
|
|
c7c8ca63b6 | ||
|
|
fa40c6e3d4 | ||
|
|
7ccbbb7a75 | ||
|
|
948c89c26f | ||
|
|
768791440a | ||
|
|
08a8dc0d0d | ||
|
|
0675ecdd73 | ||
|
|
08c160c178 | ||
|
|
677627586c | ||
|
|
0731971300 | ||
|
|
c290719984 | ||
|
|
2a145e288c | ||
|
|
aeb676e757 | ||
|
|
2852349e68 | ||
|
|
0447594e02 | ||
|
|
748a8240dd | ||
|
|
808be4678a | ||
|
|
398577f116 | ||
|
|
8e64a24d19 | ||
|
|
8b149c9aa3 | ||
|
|
a7c88c7951 | ||
|
|
db64e19b8d | ||
|
|
b574960755 | ||
|
|
c6434f609c | ||
|
|
206308c1aa | ||
|
|
6527d3e492 | ||
|
|
e616b1e356 | ||
|
|
8843062604 | ||
|
|
5e00842087 | ||
|
|
8a4d05b7bb | ||
|
|
061832af7f | ||
|
|
9dd818ed7b | ||
|
|
0e04c90abe | ||
|
|
b07e21ab3c | ||
|
|
83ea088bf7 | ||
|
|
48eb78b14d | ||
|
|
e3d1314bd8 | ||
|
|
b4d447b5cb | ||
|
|
a05aef5c14 | ||
|
|
3de5161dd8 | ||
|
|
d1e9ded76f | ||
|
|
12ee7b9b13 | ||
|
|
d9dc2036a7 | ||
|
|
54861335a0 | ||
|
|
8e0d8f4533 | ||
|
|
0cd9e62fc6 | ||
|
|
02ef1d41d7 | ||
|
|
1a38bfd31f | ||
|
|
0d7c4beecd | ||
|
|
55e1552957 | ||
|
|
7c9eaaeadb | ||
|
|
d12ef576fc | ||
|
|
a05eea3a11 | ||
|
|
446b2e7058 | ||
|
|
e06f3808c0 | ||
|
|
6d79107b14 | ||
|
|
5fe0e06342 | ||
|
|
6eb7843858 | ||
|
|
2904ca7f57 | ||
|
|
54686b0505 | ||
|
|
861c6fec06 | ||
|
|
eec954ede1 | ||
|
|
aa99c1ba55 | ||
|
|
29b1f55bb0 | ||
|
|
8c0ab106c7 | ||
|
|
dec0e2545d | ||
|
|
90cf4b9462 | ||
|
|
2bd5d2474e | ||
|
|
a6e08a83a7 | ||
|
|
ed11dd62da | ||
|
|
c977b70921 | ||
|
|
31c9ccd8be | ||
|
|
044dbb0333 | ||
|
|
d45c794a9e | ||
|
|
c9dd7e10b9 | ||
|
|
56ad400c49 | ||
|
|
e2b0402cf5 | ||
|
|
0c7fffeaf6 | ||
|
|
5f8dc21dd2 | ||
|
|
7a27f9b610 | ||
|
|
1944dd70c7 | ||
|
|
3ec76ac33d | ||
|
|
72bc22dfd1 | ||
|
|
b8e677efd2 | ||
|
|
65079f5e2e | ||
|
|
cfb21b94e8 | ||
|
|
cf74cfed15 | ||
|
|
f564a9ce51 | ||
|
|
cd1a3ad7c9 | ||
|
|
85d0a914ac | ||
|
|
d3e7e18b7d | ||
|
|
d6c76b02e3 | ||
|
|
fe3e20751c | ||
|
|
aab041e692 | ||
|
|
75e22fc7f5 | ||
|
|
6fff49b33b | ||
|
|
2eaab48532 | ||
|
|
43df4a56c4 | ||
|
|
680756500c | ||
|
|
0645a6568e | ||
|
|
3a0861694d | ||
|
|
0f4182bddf | ||
|
|
cc4284b89e | ||
|
|
a326466f32 | ||
|
|
5a67862e00 | ||
|
|
201bb3f80a | ||
|
|
49afe7d89f | ||
|
|
f968d039f7 | ||
|
|
705669ddf8 | ||
|
|
73dd345cda | ||
|
|
65c6e46775 | ||
|
|
7a1d003341 | ||
|
|
6a2a56d48f | ||
|
|
9ff5bdd297 | ||
|
|
4ba5e22f64 | ||
|
|
a8ab15d65d | ||
|
|
93953103ad | ||
|
|
f25890c140 | ||
|
|
39cf1931ae | ||
|
|
bbb6771625 | ||
|
|
e9f9f270e1 | ||
|
|
190b78b7be | ||
|
|
257f9fb2b2 | ||
|
|
d35a104ad3 | ||
|
|
9bae7a35bf | ||
|
|
33c7c5a7e3 | ||
|
|
91363daeaa | ||
|
|
f9ab85adbe | ||
|
|
9dbf43d3e7 | ||
|
|
772f4d6671 | ||
|
|
1b57218739 | ||
|
|
8767269b47 | ||
|
|
baceaed582 | ||
|
|
62a28bc2a1 | ||
|
|
f83caa6c40 | ||
|
|
53b1483e71 | ||
|
|
a0eafea200 | ||
|
|
10dace305d | ||
|
|
1eace79f77 | ||
|
|
e6033e174d | ||
|
|
f1925b8f71 | ||
|
|
834f3cc192 | ||
|
|
e049aead16 | ||
|
|
0a9c9670e7 | ||
|
|
1744dcebfe | ||
|
|
29712916e6 | ||
|
|
4d2783bb04 | ||
|
|
50f0fbb05c | ||
|
|
5a842ec94a | ||
|
|
372680e2ab | ||
|
|
6465a3f549 | ||
|
|
690eab4a25 | ||
|
|
dc2e5ceed2 | ||
|
|
1639a7338d | ||
|
|
ac7226bb27 | ||
|
|
086020e543 | ||
|
|
452d456fad | ||
|
|
f741942226 | ||
|
|
a27399cf65 | ||
|
|
29b8810db8 | ||
|
|
a5a47911d1 | ||
|
|
7bf6a3d7b2 | ||
|
|
0cabcb7c79 | ||
|
|
f359b64d59 | ||
|
|
2f3ecab8d9 | ||
|
|
17f71a1a55 | ||
|
|
bfe3bb0eeb | ||
|
|
0a67248bfe | ||
|
|
2644f087d0 | ||
|
|
91c8c7a2e3 | ||
|
|
029abd3413 | ||
|
|
726756bad4 | ||
|
|
10c56d9919 | ||
|
|
5f59f93804 | ||
|
|
704defea78 | ||
|
|
eb240c8b60 | ||
|
|
c3bcd7a410 | ||
|
|
26124e6436 | ||
|
|
3cd6f5c7ea | ||
|
|
7c646e031c | ||
|
|
0a2ca075d3 | ||
|
|
b406b6ee44 | ||
|
|
726e867058 | ||
|
|
f4d918d22a | ||
|
|
5ef3a01b6c | ||
|
|
5a98f1f076 | ||
|
|
4398f2c023 | ||
|
|
afc3b0915b | ||
|
|
f313de98c8 | ||
|
|
03d4651077 | ||
|
|
32f6a9a457 | ||
|
|
099a0802fc | ||
|
|
e258e0b2c2 | ||
|
|
c254320860 | ||
|
|
51fd849852 | ||
|
|
ab170ce4fd | ||
|
|
90226dc8a9 | ||
|
|
63868b2600 | ||
|
|
22d439f682 | ||
|
|
394f2abd49 | ||
|
|
030bcd8b05 | ||
|
|
d8d29d3615 | ||
|
|
efe5984d54 | ||
|
|
63260e6443 | ||
|
|
a794970b72 | ||
|
|
ba0f44e361 | ||
|
|
4acaecd921 | ||
|
|
84a3e95fa4 | ||
|
|
0ca44b6a82 | ||
|
|
ae2de4d0c4 | ||
|
|
e47b4acd08 | ||
|
|
a07c3743f0 | ||
|
|
954f572e79 | ||
|
|
733c02dd7c | ||
|
|
c94daf8c3d | ||
|
|
6db51ed8b2 | ||
|
|
4f2b68eef1 | ||
|
|
5f1ca15a7c | ||
|
|
e1002862a9 |
@@ -1,5 +1,4 @@
|
||||
target
|
||||
Dockerfile
|
||||
.dockerignore
|
||||
.git
|
||||
.gitignore
|
||||
|
||||
14
.github/ISSUE_TEMPLATE/bug_report.md
vendored
14
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -23,16 +23,8 @@ A clear and concise description of what you expected to happen.
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
**Desktop (please complete the following information):**
|
||||
- OS: [e.g. iOS]
|
||||
- Browser [e.g. chrome, safari]
|
||||
- Version [e.g. 22]
|
||||
|
||||
**Smartphone (please complete the following information):**
|
||||
- Device: [e.g. iPhone6]
|
||||
- OS: [e.g. iOS8.1]
|
||||
- Browser [e.g. stock browser, safari]
|
||||
- Version [e.g. 22]
|
||||
**MeiliSearch version:** [e.g. v0.20.0]
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
||||
Additional information that may be relevant to the issue.
|
||||
[e.g. architecture, device, OS, browser]
|
||||
10
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
10
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
contact_links:
|
||||
- name: Feature request
|
||||
url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal
|
||||
about: The feature requests are not managed in this repository, please open a discussion in our dedicated product repository
|
||||
- name: Documentation issue
|
||||
url: https://github.com/meilisearch/documentation/issues/new
|
||||
about: For documentation issues, open an issue or a PR in the documentation repository
|
||||
- name: Support questions & other
|
||||
url: https://github.com/meilisearch/MeiliSearch/discussions/new
|
||||
about: For any other question, open a discussion in this repository
|
||||
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -1,20 +0,0 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
||||
|
||||
**Describe the solution you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
||||
40
.github/ISSUE_TEMPLATE/tracking-issue.md
vendored
40
.github/ISSUE_TEMPLATE/tracking-issue.md
vendored
@@ -1,40 +0,0 @@
|
||||
---
|
||||
name: Tracking issue
|
||||
about: Template for a tracking issue
|
||||
title: ''
|
||||
labels: tracking-issue
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
# Summary
|
||||
|
||||
One paragraph to explain the feature.
|
||||
|
||||
# Motivations
|
||||
|
||||
Why are we doing this? What use cases does it support? What is the expected outcome?
|
||||
|
||||
# Explanation
|
||||
|
||||
Explain the proposal like it was the final documentation of this proposal.
|
||||
|
||||
- What is changing for end-users.
|
||||
- How it works.
|
||||
- What is breaking?
|
||||
- Examples.
|
||||
|
||||
# Implementation
|
||||
|
||||
Explain the technical specificities that will need to be known or done in order to implement this proposal.
|
||||
|
||||
## Steps
|
||||
|
||||
Describe each step to create the feature with it's associated issue/PR.
|
||||
|
||||
# Related
|
||||
|
||||
- [ ] Validated by the team (@people needed)
|
||||
- [ ] Test added
|
||||
- [ ] [Documentation](https://github.com/meilisearch/documentation/issues/#xxx) //Change xxx or remove the line
|
||||
- [ ] [SDK/Integrations](https://github.com/meilisearch/integration-guides/issues/#xxx) //Change xxx or remove the line
|
||||
16
.github/workflows/check-updated-changelog.yml
vendored
16
.github/workflows/check-updated-changelog.yml
vendored
@@ -1,16 +0,0 @@
|
||||
name: Check if the CHANGELOG.md has been updated
|
||||
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: Test on ${{ matrix.os }}
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'ignore-changelog') }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Checking the CHANGELOG.md has been updated in this PR
|
||||
run: |
|
||||
set -e
|
||||
git fetch origin ${{ github.base_ref }}
|
||||
git diff --name-only origin/${{ github.base_ref }} | grep -q CHANGELOG.md
|
||||
33
.github/workflows/coverage.yml
vendored
Normal file
33
.github/workflows/coverage.yml
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
---
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
name: Execute code coverage
|
||||
|
||||
jobs:
|
||||
nightly-coverage:
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clean
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --all-features --no-fail-fast
|
||||
env:
|
||||
CARGO_INCREMENTAL: "0"
|
||||
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=unwind -Zpanic_abort_tests"
|
||||
- uses: actions-rs/grcov@v0.1
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v1
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
file: ${{ steps.coverage.outputs.report }}
|
||||
yml: ./codecov.yml
|
||||
fail_ci_if_error: true
|
||||
15
.github/workflows/flaky.yml
vendored
Normal file
15
.github/workflows/flaky.yml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
name: Look for flaky tests
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 12 * * FRI" # every friday at 12:00PM
|
||||
|
||||
jobs:
|
||||
flaky:
|
||||
runs-on: ubuntu-18.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky 100 times
|
||||
run: cargo flaky -i 100 --release
|
||||
41
.github/workflows/publish-binaries.yml
vendored
41
.github/workflows/publish-binaries.yml
vendored
@@ -9,10 +9,11 @@ jobs:
|
||||
name: Publish for ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
os: [ubuntu-18.04, macos-latest, windows-latest]
|
||||
include:
|
||||
- os: ubuntu-latest
|
||||
- os: ubuntu-18.04
|
||||
artifact_name: meilisearch
|
||||
asset_name: meilisearch-linux-amd64
|
||||
- os: macos-latest
|
||||
@@ -26,7 +27,7 @@ jobs:
|
||||
- uses: hecrj/setup-rust-action@master
|
||||
with:
|
||||
rust-version: stable
|
||||
- uses: actions/checkout@v1
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
- name: Upload binaries to release
|
||||
@@ -37,40 +38,18 @@ jobs:
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-armv7:
|
||||
name: Publish for ARMv7
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v1.0.0
|
||||
- uses: uraimo/run-on-arch-action@v1.0.7
|
||||
id: runcmd
|
||||
with:
|
||||
architecture: armv7
|
||||
distribution: ubuntu18.04
|
||||
run: |
|
||||
apt update
|
||||
apt install -y curl gcc make
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable
|
||||
source $HOME/.cargo/env
|
||||
cargo build --release --locked
|
||||
- name: Upload the binary to release
|
||||
uses: svenstaro/upload-release-action@v1-release
|
||||
with:
|
||||
repo_token: ${{ secrets.PUBLISH_TOKEN }}
|
||||
file: target/release/meilisearch
|
||||
asset_name: meilisearch-linux-armv7
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-armv8:
|
||||
name: Publish for ARMv8
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v1.0.0
|
||||
- uses: uraimo/run-on-arch-action@v1.0.7
|
||||
- uses: actions/checkout@v2
|
||||
- uses: uraimo/run-on-arch-action@v2.1.1
|
||||
id: runcmd
|
||||
with:
|
||||
architecture: aarch64 # aka ARMv8
|
||||
distribution: ubuntu18.04
|
||||
arch: aarch64 # aka ARMv8
|
||||
distro: ubuntu18.04
|
||||
env: |
|
||||
JEMALLOC_SYS_WITH_LG_PAGE: 16
|
||||
run: |
|
||||
apt update
|
||||
apt install -y curl gcc make
|
||||
|
||||
76
.github/workflows/publish-crossbuild.yml
vendored
Normal file
76
.github/workflows/publish-crossbuild.yml
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
name: Publish aarch64 binary
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
jobs:
|
||||
publish-aarch64:
|
||||
name: Publish to Github
|
||||
runs-on: ${{ matrix.os }}
|
||||
continue-on-error: false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- build: aarch64
|
||||
os: ubuntu-18.04
|
||||
target: aarch64-unknown-linux-gnu
|
||||
linker: gcc-aarch64-linux-gnu
|
||||
use-cross: true
|
||||
asset_name: meilisearch-linux-aarch64
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Installing Rust toolchain
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
override: true
|
||||
|
||||
- name: APT update
|
||||
run: |
|
||||
sudo apt update
|
||||
|
||||
- name: Install target specific tools
|
||||
if: matrix.use-cross
|
||||
run: |
|
||||
sudo apt-get install -y ${{ matrix.linker }}
|
||||
|
||||
- name: Configure target aarch64 GNU
|
||||
if: matrix.target == 'aarch64-unknown-linux-gnu'
|
||||
## Environment variable is not passed using env:
|
||||
## LD gold won't work with MUSL
|
||||
# env:
|
||||
# JEMALLOC_SYS_WITH_LG_PAGE: 16
|
||||
# RUSTFLAGS: '-Clink-arg=-fuse-ld=gold'
|
||||
run: |
|
||||
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
|
||||
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
|
||||
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
|
||||
echo RUSTFLAGS="-Clink-arg=-fuse-ld=gold" >> $GITHUB_ENV
|
||||
|
||||
- name: Cargo build
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
use-cross: ${{ matrix.use-cross }}
|
||||
args: --release --target ${{ matrix.target }}
|
||||
|
||||
- name: List target output files
|
||||
run: ls -lR ./target
|
||||
|
||||
- name: Upload the binary to release
|
||||
uses: svenstaro/upload-release-action@v1-release
|
||||
with:
|
||||
repo_token: ${{ secrets.PUBLISH_TOKEN }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
6
.github/workflows/publish-deb-brew-pkg.yml
vendored
6
.github/workflows/publish-deb-brew-pkg.yml
vendored
@@ -7,14 +7,14 @@ on:
|
||||
jobs:
|
||||
debian:
|
||||
name: Publish debian packagge
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: hecrj/setup-rust-action@master
|
||||
with:
|
||||
rust-version: stable
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v1
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch-http -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
|
||||
homebrew:
|
||||
name: Bump Homebrew formula
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: Create PR to Homebrew
|
||||
uses: mislav/bump-homebrew-formula-action@v1
|
||||
|
||||
28
.github/workflows/publish-docker-latest.yml
vendored
28
.github/workflows/publish-docker-latest.yml
vendored
@@ -6,17 +6,25 @@ on:
|
||||
name: Publish latest image to Docker Hub
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
docker-latest:
|
||||
runs-on: docker
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Check if current release is latest
|
||||
run: echo "##[set-output name=is_latest;]$(sh .github/is-latest-release.sh)"
|
||||
id: release
|
||||
- name: Publish to Registry
|
||||
if: steps.release.outputs.is_latest == 'true'
|
||||
uses: elgohr/Publish-Docker-Github-Action@master
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v1
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
name: getmeili/meilisearch
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Build and push
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: getmeili/meilisearch:latest
|
||||
|
||||
33
.github/workflows/publish-docker-tag.yml
vendored
33
.github/workflows/publish-docker-tag.yml
vendored
@@ -7,14 +7,33 @@ on:
|
||||
name: Publish tagged image to Docker Hub
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
docker-tag:
|
||||
runs-on: docker
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Publish to Registry
|
||||
uses: elgohr/Publish-Docker-Github-Action@master
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v1
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
name: getmeili/meilisearch
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
tag_names: true
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v3
|
||||
with:
|
||||
images: getmeili/meilisearch
|
||||
flavor: latest=false
|
||||
tags: type=ref,event=tag
|
||||
|
||||
- name: Build and push
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
|
||||
72
.github/workflows/rust.yml
vendored
Normal file
72
.github/workflows/rust.yml
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
name: Rust
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
push:
|
||||
# trying and staging branches are for Bors config
|
||||
branches:
|
||||
- trying
|
||||
- staging
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
name: Tests on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-18.04, macos-latest, windows-latest]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v1.3.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release
|
||||
|
||||
clippy:
|
||||
name: Run Clippy
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v1.3.0
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clippy
|
||||
args: --all-targets -- --deny warnings
|
||||
|
||||
fmt:
|
||||
name: Run Rustfmt
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v1.3.0
|
||||
- name: Run cargo fmt
|
||||
run: cargo fmt --all -- --check
|
||||
93
.github/workflows/test.yml
vendored
93
.github/workflows/test.yml
vendored
@@ -1,93 +0,0 @@
|
||||
---
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- release-v*
|
||||
- trying
|
||||
- staging
|
||||
tags:
|
||||
- 'v[0-9]+.[0-9]+.[0-9]+' # this only concerns tags on stable
|
||||
|
||||
name: Test binaries with cargo test
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: Test on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clippy
|
||||
|
||||
build-image:
|
||||
name: Test the build of Docker image
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- run: docker build . --file Dockerfile -t meilisearch
|
||||
name: Docker build
|
||||
|
||||
## A push occurred on a release branch, a prerelease is created and assets are generated
|
||||
prerelease:
|
||||
name: create prerelease
|
||||
needs: [check, build-image]
|
||||
if: ${{ contains(github.ref, 'release-') && github.event_name == 'push' }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Get version number
|
||||
id: version-number
|
||||
run: echo "##[set-output name=number;]$(echo ${{ github.ref }} | sed 's/.*\(v.*\)/\1/')"
|
||||
- name: Get commit count
|
||||
id: commit-count
|
||||
run: echo "##[set-output name=count;]$(git rev-list remotes/origin/master..remotes/origin/release-${{ steps.version-number.outputs.number }} --count)"
|
||||
- name: Create Release
|
||||
id: create_release
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.PUBLISH_TOKEN }} # Personal Access Token
|
||||
with:
|
||||
tag_name: ${{ steps.version-number.outputs.number }}rc${{ steps.commit-count.outputs.count }}
|
||||
release_name: Pre-release ${{ steps.version-number.outputs.number }}-rc${{ steps.commit-count.outputs.count }}
|
||||
prerelease: true
|
||||
|
||||
## If a tag is pushed, a release is created for this tag, and assets will be generated
|
||||
release:
|
||||
name: create release
|
||||
needs: [check, build-image]
|
||||
if: ${{ contains(github.ref, 'tags/v') }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
- name: Get version number
|
||||
id: version-number
|
||||
run: echo "##[set-output name=number;]$(echo ${{ github.ref }} | sed 's/.*\(v.*\)/\1/')"
|
||||
- name: Create Release
|
||||
id: create_release
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.PUBLISH_TOKEN }} # PAT
|
||||
with:
|
||||
tag_name: ${{ steps.version-number.outputs.number }}
|
||||
release_name: Meilisearch ${{ steps.version-number.outputs.number }}
|
||||
prerelease: false
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,8 +1,9 @@
|
||||
/target
|
||||
meilisearch-core/target
|
||||
**/*.csv
|
||||
**/*.json_lines
|
||||
**/*.rs.bk
|
||||
/*.mdb
|
||||
/query-history.txt
|
||||
/data.ms
|
||||
/snapshots
|
||||
/dumps
|
||||
|
||||
78
CHANGELOG.md
78
CHANGELOG.md
@@ -1,78 +0,0 @@
|
||||
## v0.14.1
|
||||
|
||||
- Fix version mismatch in snapshot importation (#959)
|
||||
|
||||
## v0.14.0
|
||||
|
||||
- Fix facet distribution case (#797)
|
||||
- Snapshotting (#839)
|
||||
- Fix bucket-sort unwrap bug (#915)
|
||||
|
||||
## v0.13.0
|
||||
|
||||
- placeholder search (#771)
|
||||
- Add database version mismatch check (#794)
|
||||
- Displayed and searchable attributes wildcard (#846)
|
||||
- Remove sys-info route (#810)
|
||||
- Check database version mismatch (#794)
|
||||
- Fix unique docid bug (#841)
|
||||
- Error codes in updates (#792)
|
||||
- Sentry disable argument (#813)
|
||||
- Log analytics if enabled (#825)
|
||||
- Fix default values displayed on web interface (#874)
|
||||
|
||||
## v0.12.0
|
||||
|
||||
- Fix long documents not being indexed completely bug (#816)
|
||||
- Fix distinct attribute returning id instead of name (#800)
|
||||
- error code rename (#805)
|
||||
|
||||
## v0.11.1
|
||||
|
||||
- Fix facet cache on document update (#789)
|
||||
- Improvements on settings consistency (#778)
|
||||
|
||||
## v0.11.0
|
||||
|
||||
- Change the HTTP framework, moving from tide to actix-web (#601)
|
||||
- Bump sentry version to 0.18.1 (#690)
|
||||
- Enable max payload size override (#684)
|
||||
- Disable sentry in debug (#681)
|
||||
- Better terminal greeting (#680)
|
||||
- Fix highlight misalignment (#679)
|
||||
- Add support for facet count (#676)
|
||||
- Add support for faceted search (#631)
|
||||
- Add support for configuring the lmdb map size (#646, #647)
|
||||
- Add exposed port for Dockerfile (#654)
|
||||
- Add sentry probe (#664)
|
||||
- Fix url trailing slash and double slash issues (#659)
|
||||
- Fix accept all Content-Type by default (#653)
|
||||
- Return the error message from Serde when a deserialization error is encountered (#661)
|
||||
- Fix NormalizePath middleware to make the dashboard accessible (#695)
|
||||
- Update sentry features to remove openssl (#702)
|
||||
- Add SSL support (#669)
|
||||
- Rename fieldsFrequency into fieldsDistribution in stats (#719)
|
||||
- Add support for error code reporting (#703)
|
||||
- Allow the dashboard to query private servers (#732)
|
||||
- Add telemetry (#720)
|
||||
- Add post route for search (#735)
|
||||
|
||||
## v0.10.1
|
||||
|
||||
- Add support for floating points in filters (#640)
|
||||
- Add '@' character as tokenizer separator (#607)
|
||||
- Add support for filtering on arrays of strings (#611)
|
||||
|
||||
## v0.10.0
|
||||
|
||||
- Refined filtering (#592)
|
||||
- Add the number of hits in search result (#541)
|
||||
- Add support for aligned crop in search result (#543)
|
||||
- Sanitize the content displayed in the web interface (#539)
|
||||
- Add support of nested null, boolean and seq values (#571 and #568, #574)
|
||||
- Fixed the core benchmark (#576)
|
||||
- Publish an ARMv7 and ARMv8 binaries on releases (#540 and #581)
|
||||
- Fixed a bug where the result of the update status after the first update was empty (#542)
|
||||
- Fixed a bug where stop words were not handled correctly (#594)
|
||||
- Fix CORS issues (#602)
|
||||
- Support wildcard on attributes to retrieve, highlight, and crop (#549, #565, and #598)
|
||||
80
CONTRIBUTING.md
Normal file
80
CONTRIBUTING.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# Contributing
|
||||
|
||||
First, thank you for contributing to MeiliSearch! The goal of this document is to provide everything you need to start contributing to MeiliSearch.
|
||||
|
||||
- [Assumptions](#assumptions)
|
||||
- [How to Contribute](#how-to-contribute)
|
||||
- [Development Workflow](#development-workflow)
|
||||
- [Git Guidelines](#git-guidelines)
|
||||
|
||||
## Assumptions
|
||||
|
||||
1. **You're familiar with [Github](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.**
|
||||
2. **You've read the MeiliSearch [documentation](https://docs.meilisearch.com).**
|
||||
3. **You know about the [MeiliSearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
|
||||
Please use this for help.**
|
||||
|
||||
## How to Contribute
|
||||
|
||||
1. Ensure your change has an issue! Find an
|
||||
[existing issue](https://github.com/meilisearch/meilisearch/issues/) or [open a new issue](https://github.com/meilisearch/meilisearch/issues/new).
|
||||
* This is where you can get a feel if the change will be accepted or not.
|
||||
2. Once approved, [fork the MeiliSearch repository](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) in your own Github account.
|
||||
3. [Create a new Git branch](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-and-deleting-branches-within-your-repository)
|
||||
4. Review the [Development Workflow](#development-workflow) section that describes the steps to maintain the repository.
|
||||
5. Make your changes on your branch.
|
||||
6. [Submit the branch as a Pull Request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork) pointing to the `main` branch of the MeiliSearch repository. A maintainer should comment and/or review your Pull Request within a few days. Although depending on the circumstances, it may take longer.
|
||||
|
||||
## Development Workflow
|
||||
|
||||
### Setup and run MeiliSearch
|
||||
|
||||
```bash
|
||||
cargo run --release
|
||||
```
|
||||
|
||||
We recommend using the `--release` flag to test the full performance of MeiliSearch.
|
||||
|
||||
### Test
|
||||
|
||||
```bash
|
||||
cargo test
|
||||
```
|
||||
|
||||
If you get a "Too many open files" error you might want to increase the open file limit using this command:
|
||||
|
||||
```bash
|
||||
ulimit -Sn 3000
|
||||
```
|
||||
|
||||
## Git Guidelines
|
||||
|
||||
### Git Branches
|
||||
|
||||
All changes must be made in a branch and submitted as PR.
|
||||
|
||||
We do not enforce any branch naming style, but please use something descriptive of your changes.
|
||||
|
||||
### Git Commits
|
||||
|
||||
As minimal requirements, your commit message should:
|
||||
- be capitalized
|
||||
- not finish by a dot or any other punctuation character (!,?)
|
||||
- start with a verb so that we can read your commit message this way: "This commit will ...", where "..." is the commit message.
|
||||
e.g.: "Fix the home page button" or "Add more tests for create_index method"
|
||||
|
||||
We don't follow any other convention, but if you want to use one, we recommend [the Chris Beams one](https://chris.beams.io/posts/git-commit/).
|
||||
|
||||
### Github Pull Requests
|
||||
|
||||
Some notes on GitHub PRs:
|
||||
|
||||
- All PRs must be reviewed and approved by at least one maintainer.
|
||||
- The PR title should be accurate and descriptive of the changes.
|
||||
- [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br>
|
||||
The draft PRs are recommended when you want to show that you are working on something and make your work visible.
|
||||
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [Bors](https://github.com/bors-ng/bors-ng) to automatically enforce this requirement without the PR author having to rebase manually.
|
||||
|
||||
<hr>
|
||||
|
||||
Thank you again for reading this through, we can not wait to begin to work with you if you made your way through this contributing guide ❤️
|
||||
3434
Cargo.lock
generated
3434
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
10
Cargo.toml
10
Cargo.toml
@@ -1,11 +1,9 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"meilisearch-core",
|
||||
"meilisearch-http",
|
||||
"meilisearch-schema",
|
||||
"meilisearch-tokenizer",
|
||||
"meilisearch-types",
|
||||
"meilisearch-error",
|
||||
"meilisearch-lib",
|
||||
"meilisearch-auth",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
resolver = "2"
|
||||
|
||||
7
Cross.toml
Normal file
7
Cross.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
[build.env]
|
||||
passthrough = [
|
||||
"RUST_BACKTRACE",
|
||||
"CARGO_TERM_COLOR",
|
||||
"RUSTFLAGS",
|
||||
"JEMALLOC_SYS_WITH_LG_PAGE"
|
||||
]
|
||||
36
Dockerfile
36
Dockerfile
@@ -1,28 +1,48 @@
|
||||
# Compile
|
||||
FROM alpine:3.10 AS compiler
|
||||
FROM alpine:3.14 AS compiler
|
||||
|
||||
RUN apk update --quiet
|
||||
RUN apk add curl
|
||||
RUN apk add build-base
|
||||
RUN apk update --quiet \
|
||||
&& apk add -q --no-cache curl build-base
|
||||
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
|
||||
WORKDIR /meilisearch
|
||||
|
||||
COPY . .
|
||||
COPY Cargo.lock .
|
||||
COPY Cargo.toml .
|
||||
|
||||
COPY meilisearch-auth/Cargo.toml meilisearch-auth/
|
||||
COPY meilisearch-error/Cargo.toml meilisearch-error/
|
||||
COPY meilisearch-http/Cargo.toml meilisearch-http/
|
||||
COPY meilisearch-lib/Cargo.toml meilisearch-lib/
|
||||
|
||||
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
||||
|
||||
# Create dummy main.rs files for each workspace member to be able to compile all the dependencies
|
||||
RUN find . -type d -name "meilisearch-*" | xargs -I{} sh -c 'mkdir {}/src; echo "fn main() { }" > {}/src/main.rs;'
|
||||
# Use `cargo build` instead of `cargo vendor` because we need to not only download but compile dependencies too
|
||||
RUN $HOME/.cargo/bin/cargo build --release
|
||||
# Cleanup dummy main.rs files
|
||||
RUN find . -path "*/src/main.rs" -delete
|
||||
|
||||
ARG COMMIT_SHA
|
||||
ARG COMMIT_DATE
|
||||
ENV COMMIT_SHA=${COMMIT_SHA} COMMIT_DATE=${COMMIT_DATE}
|
||||
|
||||
COPY . .
|
||||
RUN $HOME/.cargo/bin/cargo build --release
|
||||
|
||||
# Run
|
||||
FROM alpine:3.10
|
||||
FROM alpine:3.14
|
||||
|
||||
RUN apk add -q --no-cache libgcc tini
|
||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||
ENV MEILI_SERVER_PROVIDER docker
|
||||
|
||||
RUN apk update --quiet \
|
||||
&& apk add -q --no-cache libgcc tini curl
|
||||
|
||||
COPY --from=compiler /meilisearch/target/release/meilisearch .
|
||||
|
||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||
EXPOSE 7700/tcp
|
||||
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019-2020 Meili SAS
|
||||
Copyright (c) 2019-2021 Meili SAS
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
97
README.md
97
README.md
@@ -6,6 +6,7 @@
|
||||
|
||||
<h4 align="center">
|
||||
<a href="https://www.meilisearch.com">Website</a> |
|
||||
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
|
||||
<a href="https://blog.meilisearch.com">Blog</a> |
|
||||
<a href="https://fr.linkedin.com/company/meilisearch">LinkedIn</a> |
|
||||
<a href="https://twitter.com/meilisearch">Twitter</a> |
|
||||
@@ -16,7 +17,7 @@
|
||||
<p align="center">
|
||||
<a href="https://github.com/meilisearch/MeiliSearch/actions"><img src="https://github.com/meilisearch/MeiliSearch/workflows/Cargo%20test/badge.svg" alt="Build Status"></a>
|
||||
<a href="https://deps.rs/repo/github/meilisearch/MeiliSearch"><img src="https://deps.rs/repo/github/meilisearch/MeiliSearch/status.svg" alt="Dependency status"></a>
|
||||
<a href="https://github.com/meilisearch/MeiliSearch/blob/master/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
|
||||
<a href="https://github.com/meilisearch/MeiliSearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
|
||||
<a href="https://slack.meilisearch.com"><img src="https://img.shields.io/badge/slack-MeiliSearch-blue.svg?logo=slack" alt="Slack"></a>
|
||||
<a href="https://github.com/meilisearch/MeiliSearch/discussions" alt="Discussions"><img src="https://img.shields.io/badge/github-discussions-red" /></a>
|
||||
<a href="https://app.bors.tech/repositories/26457"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
|
||||
@@ -28,16 +29,16 @@
|
||||
For more information about features go to [our documentation](https://docs.meilisearch.com/).
|
||||
|
||||
<p align="center">
|
||||
<img src="assets/movies-web-demo.gif" alt="Web interface gif" />
|
||||
<img src="assets/trumen-fast.gif" alt="Web interface gif" />
|
||||
</p>
|
||||
|
||||
## ✨ Features
|
||||
* Search as-you-type experience (answers < 50 milliseconds)
|
||||
* Search-as-you-type experience (answers < 50 milliseconds)
|
||||
* Full-text search
|
||||
* Typo tolerant (understands typos and miss-spelling)
|
||||
* Typo tolerant (understands typos and misspelling)
|
||||
* Faceted search and filters
|
||||
* Supports Kanji characters
|
||||
* Supports Synonym
|
||||
* Supports hanzi (Chinese characters)
|
||||
* Supports synonyms
|
||||
* Easy to install, deploy, and maintain
|
||||
* Whole documents are returned
|
||||
* Highly customizable
|
||||
@@ -47,7 +48,7 @@ For more information about features go to [our documentation](https://docs.meili
|
||||
|
||||
### Deploy the Server
|
||||
|
||||
#### Brew (Mac OS)
|
||||
#### Homebrew (Mac OS)
|
||||
|
||||
```bash
|
||||
brew update && brew install meilisearch
|
||||
@@ -57,13 +58,27 @@ meilisearch
|
||||
#### Docker
|
||||
|
||||
```bash
|
||||
docker run -p 7700:7700 -v $(pwd)/data.ms:/data.ms getmeili/meilisearch
|
||||
docker run -p 7700:7700 -v "$(pwd)/data.ms:/data.ms" getmeili/meilisearch
|
||||
```
|
||||
|
||||
#### Announcing a cloud-hosted MeiliSearch
|
||||
|
||||
Join the closed beta by filling out this [form](https://meilisearch.typeform.com/to/FtnzvZfh).
|
||||
|
||||
#### Try MeiliSearch in our Sandbox
|
||||
|
||||
Create a MeiliSearch instance in [MeiliSearch Sandbox](https://sandbox.meilisearch.com/). This instance is free, and will be active for 48 hours.
|
||||
|
||||
#### Run on Digital Ocean
|
||||
|
||||
[](https://marketplace.digitalocean.com/apps/meilisearch?action=deploy&refcode=7c67bd97e101)
|
||||
|
||||
#### Deploy on Platform.sh
|
||||
|
||||
<a href="https://console.platform.sh/projects/create-project?template=https://raw.githubusercontent.com/platformsh/template-builder/master/templates/meilisearch/.platform.template.yaml&utm_content=meilisearch&utm_source=github&utm_medium=button&utm_campaign=deploy_on_platform">
|
||||
<img src="https://platform.sh/images/deploy/lg-blue.svg" alt="Deploy on Platform.sh" width="180px" />
|
||||
</a>
|
||||
|
||||
#### APT (Debian & Ubuntu)
|
||||
|
||||
```bash
|
||||
@@ -86,13 +101,6 @@ If you have the latest stable Rust toolchain installed on your local system, clo
|
||||
```bash
|
||||
git clone https://github.com/meilisearch/MeiliSearch.git
|
||||
cd MeiliSearch
|
||||
```
|
||||
|
||||
In the cloned repository, compile MeiliSearch.
|
||||
|
||||
```bash
|
||||
rustup override set stable
|
||||
rustup update stable
|
||||
cargo run --release
|
||||
```
|
||||
|
||||
@@ -104,14 +112,7 @@ Let's create an index! If you need a sample dataset, use [this movie database](h
|
||||
curl -L 'https://bit.ly/2PAcw9l' -o movies.json
|
||||
```
|
||||
|
||||
MeiliSearch can serve multiple indexes, with different kinds of documents.
|
||||
It is required to create an index before sending documents to it.
|
||||
|
||||
```bash
|
||||
curl -i -X POST 'http://127.0.0.1:7700/indexes' --data '{ "name": "Movies", "uid": "movies" }'
|
||||
```
|
||||
|
||||
Now that the server knows about your brand new index, you're ready to send it some data.
|
||||
Now, you're ready to index some data.
|
||||
|
||||
```bash
|
||||
curl -i -X POST 'http://127.0.0.1:7700/indexes/movies/documents' \
|
||||
@@ -138,27 +139,29 @@ curl 'http://127.0.0.1:7700/indexes/movies/search?q=botman+robin&limit=2' | jq
|
||||
"id": "415",
|
||||
"title": "Batman & Robin",
|
||||
"poster": "https://image.tmdb.org/t/p/w1280/79AYCcxw3kSKbhGpx1LiqaCAbwo.jpg",
|
||||
"overview": "Along with crime-fighting partner Robin and new recruit Batgirl...",
|
||||
"release_date": "1997-06-20",
|
||||
"overview": "Along with crime-fighting partner Robin and new recruit Batgirl, Batman battles the dual threat of frosty genius Mr. Freeze and homicidal horticulturalist Poison Ivy. Freeze plans to put Gotham City on ice, while Ivy tries to drive a wedge between the dynamic duo.",
|
||||
"release_date": 866768400
|
||||
},
|
||||
{
|
||||
"id": "411736",
|
||||
"title": "Batman: Return of the Caped Crusaders",
|
||||
"poster": "https://image.tmdb.org/t/p/w1280/GW3IyMW5Xgl0cgCN8wu96IlNpD.jpg",
|
||||
"overview": "Adam West and Burt Ward returns to their iconic roles of Batman and Robin...",
|
||||
"release_date": "2016-10-08",
|
||||
"overview": "Adam West and Burt Ward returns to their iconic roles of Batman and Robin. Featuring the voices of Adam West, Burt Ward, and Julie Newmar, the film sees the superheroes going up against classic villains like The Joker, The Riddler, The Penguin and Catwoman, both in Gotham City… and in space.",
|
||||
"release_date": 1475888400
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"nbHits": 8,
|
||||
"exhaustiveNbHits": false,
|
||||
"query": "botman robin",
|
||||
"limit": 2,
|
||||
"processingTimeMs": 1,
|
||||
"query": "botman robin"
|
||||
"offset": 0,
|
||||
"processingTimeMs": 2
|
||||
}
|
||||
```
|
||||
|
||||
#### Use the Web Interface
|
||||
|
||||
We also deliver an **out-of-the-box web interface** in which you can test MeiliSearch interactively.
|
||||
We also deliver an **out-of-the-box [web interface](https://github.com/meilisearch/mini-dashboard)** in which you can test MeiliSearch interactively.
|
||||
|
||||
You can access the web interface in your web browser at the root of the server. The default URL is [http://127.0.0.1:7700](http://127.0.0.1:7700). All you need to do is open your web browser and enter MeiliSearch’s address to visit it. This will lead you to a web page with a search bar that will allow you to search in the selected index.
|
||||
|
||||
@@ -170,23 +173,33 @@ Now that your MeiliSearch server is up and running, you can learn more about how
|
||||
|
||||
## Contributing
|
||||
|
||||
Hey! We're glad you're thinking about contributing to MeiliSearch! If you think something is missing or could be improved, please open issues and pull requests. If you'd like to help this project grow, we'd love to have you! To start contributing, checking [issues tagged as "good-first-issue"](https://github.com/meilisearch/MeiliSearch/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) is a good start!
|
||||
Hey! We're glad you're thinking about contributing to MeiliSearch! Feel free to pick an [issue labeled as `good first issue`](https://github.com/meilisearch/MeiliSearch/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22), and to ask any question you need. Some points might not be clear and we are available to help you!
|
||||
|
||||
Also, we recommend following the [CONTRIBUTING](./CONTRIBUTING.md) to create your PR.
|
||||
|
||||
## Core engine and tokenizer
|
||||
|
||||
The code in this repository is only concerned with managing multiple indexes, handling the update store, and exposing an HTTP API.
|
||||
|
||||
Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/milli), while tokenization is handled by [our `tokenizer` library](https://github.com/meilisearch/tokenizer/).
|
||||
## Telemetry
|
||||
|
||||
MeiliSearch collects anonymous data regarding general usage.
|
||||
This helps us better understand developers usage of MeiliSearch features.<br/>
|
||||
To see what information we're retrieving, please see the complete list [on the dedicated issue](https://github.com/meilisearch/MeiliSearch/issues/720).<br/>
|
||||
We also use Sentry to make us crash and error reports. If you want to know more about what Sentry collects, please visit their [privacy policy website](https://sentry.io/privacy/).<br/>
|
||||
This program is optionnal, you can disable these analytics by using the `MEILI_NO_ANALYTICS` env variable.
|
||||
This helps us better understand developers' usage of MeiliSearch features.
|
||||
|
||||
To find out more on what information we're retrieving, please see our documentation on [Telemetry](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html).
|
||||
|
||||
This program is optional, you can disable these analytics by using the `MEILI_NO_ANALYTICS` env variable.
|
||||
|
||||
## Feature request
|
||||
|
||||
The feature requests are not managed in this repository. Please visit our [dedicated repository](https://github.com/meilisearch/product) to see our work about the MeiliSearch product.
|
||||
|
||||
If you have a feature request or any feedback about an existing feature, please open [a discussion](https://github.com/meilisearch/product/discussions).
|
||||
Also, feel free to participate in the current discussions, we are looking forward to reading your comments.
|
||||
|
||||
## 💌 Contact
|
||||
|
||||
Feel free to contact us about any questions you may have:
|
||||
* At [bonjour@meilisearch.com](mailto:bonjour@meilisearch.com)
|
||||
* Via the chat box available on every page of [our documentation](https://docs.meilisearch.com/) and on [our landing page](https://www.meilisearch.com/).
|
||||
* 🆕 Join our [GitHub Discussions forum](https://github.com/meilisearch/MeiliSearch/discussions)
|
||||
* Join our [Slack community](https://slack.meilisearch.com/).
|
||||
* By opening an issue.
|
||||
Please visit [this page](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html#contact-us).
|
||||
|
||||
MeiliSearch is developed by [Meili](https://www.meilisearch.com), a young company. To know more about us, you can [read our blog](https://blog.meilisearch.com). Any suggestion or feedback is highly appreciated. Thank you for your support!
|
||||
|
||||
33
SECURITY.md
Normal file
33
SECURITY.md
Normal file
@@ -0,0 +1,33 @@
|
||||
# Security
|
||||
|
||||
MeiliSearch takes the security of our software products and services seriously.
|
||||
|
||||
If you believe you have found a security vulnerability in any MeiliSearch-owned repository, please report it to us as described below.
|
||||
|
||||
## Suported versions
|
||||
|
||||
As long as we are pre-v1.0, only the latest version of MeiliSearch will be supported with security updates.
|
||||
|
||||
## Reporting security issues
|
||||
|
||||
⚠️ Please do not report security vulnerabilities through public GitHub issues. ⚠️
|
||||
|
||||
Instead, please kindly email us at security@meilisearch.com
|
||||
|
||||
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
|
||||
|
||||
- Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
|
||||
- Full paths of source file(s) related to the manifestation of the issue
|
||||
- The location of the affected source code (tag/branch/commit or direct URL)
|
||||
- Any special configuration required to reproduce the issue
|
||||
- Step-by-step instructions to reproduce the issue
|
||||
- Proof-of-concept or exploit code (if possible)
|
||||
- Impact of the issue, including how an attacker might exploit the issue
|
||||
|
||||
This information will help us triage your report more quickly.
|
||||
|
||||
You will receive a response from us within 72 hours. If the issue is confirmed, we will release a patch as soon as possible depending on complexity.
|
||||
|
||||
## Preferred languages
|
||||
|
||||
We prefer all communications to be in English.
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 5.3 MiB |
BIN
assets/trumen-fast.gif
Normal file
BIN
assets/trumen-fast.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.4 MiB |
13
bors.toml
13
bors.toml
@@ -1,3 +1,10 @@
|
||||
status = ["Test on macos-latest", "Test on ubuntu-latest"]
|
||||
# 4 hours timeout
|
||||
timeout-sec = 14400
|
||||
status = [
|
||||
'Tests on ubuntu-18.04',
|
||||
'Tests on macos-latest',
|
||||
'Tests on windows-latest',
|
||||
'Run Clippy',
|
||||
'Run Rustfmt'
|
||||
]
|
||||
pr_status = ['Milestone Check']
|
||||
# 3 hours timeout
|
||||
timeout-sec = 10800
|
||||
|
||||
38
bump.sh
38
bump.sh
@@ -1,38 +0,0 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
NEW_VERSION=$1
|
||||
|
||||
if [ -z "$NEW_VERSION" ]
|
||||
then
|
||||
echo "error: a version number must be provided"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# find current version
|
||||
CURRENT_VERSION=$(cat **/*.toml | grep meilisearch | grep version | sed 's/.*\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/' | sed "1q;d")
|
||||
|
||||
# bump all version in .toml
|
||||
echo "bumping from version $CURRENT_VERSION to version $NEW_VERSION"
|
||||
while true
|
||||
do
|
||||
read -r -p "Continue (y/n)?" choice
|
||||
case "$choice" in
|
||||
y|Y ) break;;
|
||||
n|N ) echo "aborting bump" && exit 0;;
|
||||
* ) echo "invalid choice";;
|
||||
esac
|
||||
done
|
||||
# update all crate version
|
||||
sed -i "s/version = \"$CURRENT_VERSION\"/version = \"$NEW_VERSION\"/" **/*.toml
|
||||
|
||||
printf "running cargo check: "
|
||||
|
||||
CARGO_CHECK=$(cargo check 2>&1)
|
||||
|
||||
if [ $? != "0" ]
|
||||
then
|
||||
printf "\033[31;1m FAIL \033[0m\n"
|
||||
printf "$CARGO_CHECK"
|
||||
exit 1
|
||||
fi
|
||||
printf "\033[32;1m OK \033[0m\n"
|
||||
@@ -1 +0,0 @@
|
||||
_datas in movies.csv are from https://www.themoviedb.org/_
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,10 +0,0 @@
|
||||
{
|
||||
"searchableAttributes": ["title", "overview"],
|
||||
"displayedAttributes": [
|
||||
"id",
|
||||
"title",
|
||||
"overview",
|
||||
"release_date",
|
||||
"poster"
|
||||
]
|
||||
}
|
||||
@@ -6,7 +6,6 @@ GREEN='\033[32m'
|
||||
DEFAULT='\033[0m'
|
||||
|
||||
# GLOBALS
|
||||
BINARY_NAME='meilisearch'
|
||||
GREP_SEMVER_REGEXP='v\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)$' # i.e. v[number].[number].[number]
|
||||
|
||||
# FUNCTIONS
|
||||
@@ -22,7 +21,7 @@ semverParseInto() {
|
||||
eval $2=`echo $1 | sed -e "s#$RE#\1#"`
|
||||
#MINOR
|
||||
eval $3=`echo $1 | sed -e "s#$RE#\2#"`
|
||||
#MINOR
|
||||
#PATCH
|
||||
eval $4=`echo $1 | sed -e "s#$RE#\3#"`
|
||||
#SPECIAL
|
||||
eval $5=`echo $1 | sed -e "s#$RE#\4#"`
|
||||
@@ -52,13 +51,13 @@ semverLT() {
|
||||
if [ $MAJOR_A -le $MAJOR_B ] && [ $MINOR_A -le $MINOR_B ] && [ $PATCH_A -lt $PATCH_B ]; then
|
||||
return 0
|
||||
fi
|
||||
if [ "_$SPECIAL_A" == "_" ] && [ "_$SPECIAL_B" == "_" ] ; then
|
||||
if [ "_$SPECIAL_A" == '_' ] && [ "_$SPECIAL_B" == '_' ] ; then
|
||||
return 1
|
||||
fi
|
||||
if [ "_$SPECIAL_A" == "_" ] && [ "_$SPECIAL_B" != "_" ] ; then
|
||||
if [ "_$SPECIAL_A" == '_' ] && [ "_$SPECIAL_B" != '_' ] ; then
|
||||
return 1
|
||||
fi
|
||||
if [ "_$SPECIAL_A" != "_" ] && [ "_$SPECIAL_B" == "_" ] ; then
|
||||
if [ "_$SPECIAL_A" != '_' ] && [ "_$SPECIAL_B" == '_' ] ; then
|
||||
return 0
|
||||
fi
|
||||
if [ "_$SPECIAL_A" < "_$SPECIAL_B" ]; then
|
||||
@@ -68,39 +67,47 @@ semverLT() {
|
||||
return 1
|
||||
}
|
||||
|
||||
# Get a token from https://github.com/settings/tokens to increasae rate limit (from 60 to 5000), make sure the token scope is set to 'public_repo'
|
||||
# Create GITHUB_PAT enviroment variable once you aquired the token to start using it
|
||||
# Returns the tag of the latest stable release (in terms of semver and not of release date)
|
||||
get_latest() {
|
||||
temp_file='temp_file' # temp_file needed because the grep would start before the download is over
|
||||
curl -s 'https://api.github.com/repos/meilisearch/MeiliSearch/releases' > "$temp_file"
|
||||
|
||||
if [ -z "$GITHUB_PAT" ]; then
|
||||
curl -s 'https://api.github.com/repos/meilisearch/MeiliSearch/releases' > "$temp_file" || return 1
|
||||
else
|
||||
curl -H "Authorization: token $GITHUB_PAT" -s 'https://api.github.com/repos/meilisearch/MeiliSearch/releases' > "$temp_file" || return 1
|
||||
fi
|
||||
|
||||
releases=$(cat "$temp_file" | \
|
||||
grep -E "tag_name|draft|prerelease" \
|
||||
grep -E '"tag_name":|"draft":|"prerelease":' \
|
||||
| tr -d ',"' | cut -d ':' -f2 | tr -d ' ')
|
||||
# Returns a list of [tag_name draft_boolean prerelease_boolean ...]
|
||||
# Ex: v0.10.1 false false v0.9.1-rc.1 false true v0.9.0 false false...
|
||||
|
||||
i=0
|
||||
latest=""
|
||||
current_tag=""
|
||||
latest=''
|
||||
current_tag=''
|
||||
for release_info in $releases; do
|
||||
if [ $i -eq 0 ]; then # Cheking tag_name
|
||||
if echo "$release_info" | grep -q "$GREP_SEMVER_REGEXP"; then # If it's not an alpha or beta release
|
||||
current_tag=$release_info
|
||||
else
|
||||
current_tag=""
|
||||
current_tag=''
|
||||
fi
|
||||
i=1
|
||||
elif [ $i -eq 1 ]; then # Checking draft boolean
|
||||
if [ "$release_info" = "true" ]; then
|
||||
current_tag=""
|
||||
if [ "$release_info" = 'true' ]; then
|
||||
current_tag=''
|
||||
fi
|
||||
i=2
|
||||
elif [ $i -eq 2 ]; then # Checking prerelease boolean
|
||||
if [ "$release_info" = "true" ]; then
|
||||
current_tag=""
|
||||
if [ "$release_info" = 'true' ]; then
|
||||
current_tag=''
|
||||
fi
|
||||
i=0
|
||||
if [ "$current_tag" != "" ]; then # If the current_tag is valid
|
||||
if [ "$latest" = "" ]; then # If there is no latest yet
|
||||
if [ "$current_tag" != '' ]; then # If the current_tag is valid
|
||||
if [ "$latest" = '' ]; then # If there is no latest yet
|
||||
latest="$current_tag"
|
||||
else
|
||||
semverLT $current_tag $latest # Comparing latest and the current tag
|
||||
@@ -127,6 +134,9 @@ get_os() {
|
||||
'Linux')
|
||||
os='linux'
|
||||
;;
|
||||
'MINGW'*)
|
||||
os='windows'
|
||||
;;
|
||||
*)
|
||||
return 1
|
||||
esac
|
||||
@@ -138,7 +148,7 @@ get_os() {
|
||||
get_archi() {
|
||||
architecture=$(uname -m)
|
||||
case "$architecture" in
|
||||
'x86_64' | 'amd64')
|
||||
'x86_64' | 'amd64' | 'arm64')
|
||||
archi='amd64'
|
||||
;;
|
||||
'aarch64')
|
||||
@@ -151,7 +161,7 @@ get_archi() {
|
||||
}
|
||||
|
||||
success_usage() {
|
||||
printf "$GREEN%s\n$DEFAULT" "MeiliSearch binary successfully downloaded as '$BINARY_NAME' file."
|
||||
printf "$GREEN%s\n$DEFAULT" "MeiliSearch $latest binary successfully downloaded as '$binary_name' file."
|
||||
echo ''
|
||||
echo 'Run it:'
|
||||
echo ' $ ./meilisearch'
|
||||
@@ -163,25 +173,43 @@ failure_usage() {
|
||||
printf "$RED%s\n$DEFAULT" 'ERROR: MeiliSearch binary is not available for your OS distribution or your architecture yet.'
|
||||
echo ''
|
||||
echo 'However, you can easily compile the binary from the source files.'
|
||||
echo 'Follow the steps at the page ("Source" tab): https://docs.meilisearch.com/guides/advanced_guides/installation.html'
|
||||
echo 'Follow the steps at the page ("Source" tab): https://docs.meilisearch.com/learn/getting_started/installation.html'
|
||||
}
|
||||
|
||||
# MAIN
|
||||
latest="$(get_latest)"
|
||||
get_os
|
||||
if [ "$?" -eq 1 ]; then
|
||||
|
||||
if [ "$latest" = '' ]; then
|
||||
echo ''
|
||||
echo 'Impossible to get the latest stable version of MeiliSearch.'
|
||||
echo 'Please let us know about this issue: https://github.com/meilisearch/MeiliSearch/issues/new/choose'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! get_os; then
|
||||
failure_usage
|
||||
exit 1
|
||||
fi
|
||||
get_archi
|
||||
if [ "$?" -eq 1 ]; then
|
||||
|
||||
if ! get_archi; then
|
||||
failure_usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Downloading MeiliSearch binary $latest for $os, architecture $archi..."
|
||||
release_file="meilisearch-$os-$archi"
|
||||
case "$os" in
|
||||
'windows')
|
||||
release_file="meilisearch-$os-$archi.exe"
|
||||
binary_name='meilisearch.exe'
|
||||
|
||||
;;
|
||||
*)
|
||||
release_file="meilisearch-$os-$archi"
|
||||
binary_name='meilisearch'
|
||||
|
||||
esac
|
||||
link="https://github.com/meilisearch/MeiliSearch/releases/download/$latest/$release_file"
|
||||
curl -OL "$link"
|
||||
mv "$release_file" "$BINARY_NAME"
|
||||
chmod 744 "$BINARY_NAME"
|
||||
mv "$release_file" "$binary_name"
|
||||
chmod 744 "$binary_name"
|
||||
success_usage
|
||||
|
||||
15
meilisearch-auth/Cargo.toml
Normal file
15
meilisearch-auth/Cargo.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
[package]
|
||||
name = "meilisearch-auth"
|
||||
version = "0.25.0"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
enum-iterator = "0.7.0"
|
||||
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
|
||||
sha2 = "0.9.6"
|
||||
chrono = { version = "0.4.19", features = ["serde"] }
|
||||
meilisearch-error = { path = "../meilisearch-error" }
|
||||
serde_json = { version = "1.0.67", features = ["preserve_order"] }
|
||||
rand = "0.8.4"
|
||||
serde = { version = "1.0.130", features = ["derive"] }
|
||||
thiserror = "1.0.28"
|
||||
104
meilisearch-auth/src/action.rs
Normal file
104
meilisearch-auth/src/action.rs
Normal file
@@ -0,0 +1,104 @@
|
||||
use enum_iterator::IntoEnumIterator;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(IntoEnumIterator, Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq)]
|
||||
#[repr(u8)]
|
||||
pub enum Action {
|
||||
#[serde(rename = "*")]
|
||||
All = 0,
|
||||
#[serde(rename = "search")]
|
||||
Search = actions::SEARCH,
|
||||
#[serde(rename = "documents.add")]
|
||||
DocumentsAdd = actions::DOCUMENTS_ADD,
|
||||
#[serde(rename = "documents.get")]
|
||||
DocumentsGet = actions::DOCUMENTS_GET,
|
||||
#[serde(rename = "documents.delete")]
|
||||
DocumentsDelete = actions::DOCUMENTS_DELETE,
|
||||
#[serde(rename = "indexes.create")]
|
||||
IndexesAdd = actions::INDEXES_CREATE,
|
||||
#[serde(rename = "indexes.get")]
|
||||
IndexesGet = actions::INDEXES_GET,
|
||||
#[serde(rename = "indexes.update")]
|
||||
IndexesUpdate = actions::INDEXES_UPDATE,
|
||||
#[serde(rename = "indexes.delete")]
|
||||
IndexesDelete = actions::INDEXES_DELETE,
|
||||
#[serde(rename = "tasks.get")]
|
||||
TasksGet = actions::TASKS_GET,
|
||||
#[serde(rename = "settings.get")]
|
||||
SettingsGet = actions::SETTINGS_GET,
|
||||
#[serde(rename = "settings.update")]
|
||||
SettingsUpdate = actions::SETTINGS_UPDATE,
|
||||
#[serde(rename = "stats.get")]
|
||||
StatsGet = actions::STATS_GET,
|
||||
#[serde(rename = "dumps.create")]
|
||||
DumpsCreate = actions::DUMPS_CREATE,
|
||||
#[serde(rename = "dumps.get")]
|
||||
DumpsGet = actions::DUMPS_GET,
|
||||
#[serde(rename = "version")]
|
||||
Version = actions::VERSION,
|
||||
}
|
||||
|
||||
impl Action {
|
||||
pub fn from_repr(repr: u8) -> Option<Self> {
|
||||
use actions::*;
|
||||
match repr {
|
||||
0 => Some(Self::All),
|
||||
SEARCH => Some(Self::Search),
|
||||
DOCUMENTS_ADD => Some(Self::DocumentsAdd),
|
||||
DOCUMENTS_GET => Some(Self::DocumentsGet),
|
||||
DOCUMENTS_DELETE => Some(Self::DocumentsDelete),
|
||||
INDEXES_CREATE => Some(Self::IndexesAdd),
|
||||
INDEXES_GET => Some(Self::IndexesGet),
|
||||
INDEXES_UPDATE => Some(Self::IndexesUpdate),
|
||||
INDEXES_DELETE => Some(Self::IndexesDelete),
|
||||
TASKS_GET => Some(Self::TasksGet),
|
||||
SETTINGS_GET => Some(Self::SettingsGet),
|
||||
SETTINGS_UPDATE => Some(Self::SettingsUpdate),
|
||||
STATS_GET => Some(Self::StatsGet),
|
||||
DUMPS_CREATE => Some(Self::DumpsCreate),
|
||||
DUMPS_GET => Some(Self::DumpsGet),
|
||||
VERSION => Some(Self::Version),
|
||||
_otherwise => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn repr(&self) -> u8 {
|
||||
use actions::*;
|
||||
match self {
|
||||
Self::All => 0,
|
||||
Self::Search => SEARCH,
|
||||
Self::DocumentsAdd => DOCUMENTS_ADD,
|
||||
Self::DocumentsGet => DOCUMENTS_GET,
|
||||
Self::DocumentsDelete => DOCUMENTS_DELETE,
|
||||
Self::IndexesAdd => INDEXES_CREATE,
|
||||
Self::IndexesGet => INDEXES_GET,
|
||||
Self::IndexesUpdate => INDEXES_UPDATE,
|
||||
Self::IndexesDelete => INDEXES_DELETE,
|
||||
Self::TasksGet => TASKS_GET,
|
||||
Self::SettingsGet => SETTINGS_GET,
|
||||
Self::SettingsUpdate => SETTINGS_UPDATE,
|
||||
Self::StatsGet => STATS_GET,
|
||||
Self::DumpsCreate => DUMPS_CREATE,
|
||||
Self::DumpsGet => DUMPS_GET,
|
||||
Self::Version => VERSION,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod actions {
|
||||
pub const SEARCH: u8 = 1;
|
||||
pub const DOCUMENTS_ADD: u8 = 2;
|
||||
pub const DOCUMENTS_GET: u8 = 3;
|
||||
pub const DOCUMENTS_DELETE: u8 = 4;
|
||||
pub const INDEXES_CREATE: u8 = 5;
|
||||
pub const INDEXES_GET: u8 = 6;
|
||||
pub const INDEXES_UPDATE: u8 = 7;
|
||||
pub const INDEXES_DELETE: u8 = 8;
|
||||
pub const TASKS_GET: u8 = 9;
|
||||
pub const SETTINGS_GET: u8 = 10;
|
||||
pub const SETTINGS_UPDATE: u8 = 11;
|
||||
pub const STATS_GET: u8 = 12;
|
||||
pub const DUMPS_CREATE: u8 = 13;
|
||||
pub const DUMPS_GET: u8 = 14;
|
||||
pub const VERSION: u8 = 15;
|
||||
}
|
||||
44
meilisearch-auth/src/dump.rs
Normal file
44
meilisearch-auth/src/dump.rs
Normal file
@@ -0,0 +1,44 @@
|
||||
use std::fs::File;
|
||||
use std::io::BufRead;
|
||||
use std::io::BufReader;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::{AuthController, HeedAuthStore, Result};
|
||||
|
||||
const KEYS_PATH: &str = "keys";
|
||||
|
||||
impl AuthController {
|
||||
pub fn dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let store = HeedAuthStore::new(&src)?;
|
||||
|
||||
let keys_file_path = dst.as_ref().join(KEYS_PATH);
|
||||
|
||||
let keys = store.list_api_keys()?;
|
||||
let mut keys_file = File::create(&keys_file_path)?;
|
||||
for key in keys {
|
||||
serde_json::to_writer(&mut keys_file, &key)?;
|
||||
keys_file.write_all(b"\n")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let store = HeedAuthStore::new(&dst)?;
|
||||
|
||||
let keys_file_path = src.as_ref().join(KEYS_PATH);
|
||||
|
||||
if !keys_file_path.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut reader = BufReader::new(File::open(&keys_file_path)?).lines();
|
||||
while let Some(key) = reader.next().transpose()? {
|
||||
let key = serde_json::from_str(&key)?;
|
||||
store.put_api_key(key)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
46
meilisearch-auth/src/error.rs
Normal file
46
meilisearch-auth/src/error.rs
Normal file
@@ -0,0 +1,46 @@
|
||||
use std::error::Error;
|
||||
|
||||
use meilisearch_error::ErrorCode;
|
||||
use meilisearch_error::{internal_error, Code};
|
||||
use serde_json::Value;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, AuthControllerError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum AuthControllerError {
|
||||
#[error("`{0}` field is mandatory.")]
|
||||
MissingParameter(&'static str),
|
||||
#[error("actions field value `{0}` is invalid. It should be an array of string representing action names.")]
|
||||
InvalidApiKeyActions(Value),
|
||||
#[error("indexes field value `{0}` is invalid. It should be an array of string representing index names.")]
|
||||
InvalidApiKeyIndexes(Value),
|
||||
#[error("expiresAt field value `{0}` is invalid. It should be in ISO-8601 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DDTHH:MM:SS'.")]
|
||||
InvalidApiKeyExpiresAt(Value),
|
||||
#[error("description field value `{0}` is invalid. It should be a string or specified as a null value.")]
|
||||
InvalidApiKeyDescription(Value),
|
||||
#[error("API key `{0}` not found.")]
|
||||
ApiKeyNotFound(String),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
AuthControllerError: heed::Error,
|
||||
std::io::Error,
|
||||
serde_json::Error,
|
||||
std::str::Utf8Error
|
||||
);
|
||||
|
||||
impl ErrorCode for AuthControllerError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
Self::MissingParameter(_) => Code::MissingParameter,
|
||||
Self::InvalidApiKeyActions(_) => Code::InvalidApiKeyActions,
|
||||
Self::InvalidApiKeyIndexes(_) => Code::InvalidApiKeyIndexes,
|
||||
Self::InvalidApiKeyExpiresAt(_) => Code::InvalidApiKeyExpiresAt,
|
||||
Self::InvalidApiKeyDescription(_) => Code::InvalidApiKeyDescription,
|
||||
Self::ApiKeyNotFound(_) => Code::ApiKeyNotFound,
|
||||
Self::Internal(_) => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
161
meilisearch-auth/src/key.rs
Normal file
161
meilisearch-auth/src/key.rs
Normal file
@@ -0,0 +1,161 @@
|
||||
use crate::action::Action;
|
||||
use crate::error::{AuthControllerError, Result};
|
||||
use crate::store::{KeyId, KEY_ID_LENGTH};
|
||||
use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};
|
||||
use rand::Rng;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{from_value, Value};
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct Key {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub description: Option<String>,
|
||||
pub id: KeyId,
|
||||
pub actions: Vec<Action>,
|
||||
pub indexes: Vec<String>,
|
||||
pub expires_at: Option<DateTime<Utc>>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Key {
|
||||
pub fn create_from_value(value: Value) -> Result<Self> {
|
||||
let description = value
|
||||
.get("description")
|
||||
.map(|des| {
|
||||
from_value(des.clone())
|
||||
.map_err(|_| AuthControllerError::InvalidApiKeyDescription(des.clone()))
|
||||
})
|
||||
.transpose()?;
|
||||
|
||||
let id = generate_id();
|
||||
|
||||
let actions = value
|
||||
.get("actions")
|
||||
.map(|act| {
|
||||
from_value(act.clone())
|
||||
.map_err(|_| AuthControllerError::InvalidApiKeyActions(act.clone()))
|
||||
})
|
||||
.ok_or(AuthControllerError::MissingParameter("actions"))??;
|
||||
|
||||
let indexes = value
|
||||
.get("indexes")
|
||||
.map(|ind| {
|
||||
from_value(ind.clone())
|
||||
.map_err(|_| AuthControllerError::InvalidApiKeyIndexes(ind.clone()))
|
||||
})
|
||||
.ok_or(AuthControllerError::MissingParameter("indexes"))??;
|
||||
|
||||
let expires_at = value
|
||||
.get("expiresAt")
|
||||
.map(parse_expiration_date)
|
||||
.ok_or(AuthControllerError::MissingParameter("expiresAt"))??;
|
||||
|
||||
let created_at = Utc::now();
|
||||
let updated_at = Utc::now();
|
||||
|
||||
Ok(Self {
|
||||
description,
|
||||
id,
|
||||
actions,
|
||||
indexes,
|
||||
expires_at,
|
||||
created_at,
|
||||
updated_at,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn update_from_value(&mut self, value: Value) -> Result<()> {
|
||||
if let Some(des) = value.get("description") {
|
||||
let des = from_value(des.clone())
|
||||
.map_err(|_| AuthControllerError::InvalidApiKeyDescription(des.clone()));
|
||||
self.description = des?;
|
||||
}
|
||||
|
||||
if let Some(act) = value.get("actions") {
|
||||
let act = from_value(act.clone())
|
||||
.map_err(|_| AuthControllerError::InvalidApiKeyActions(act.clone()));
|
||||
self.actions = act?;
|
||||
}
|
||||
|
||||
if let Some(ind) = value.get("indexes") {
|
||||
let ind = from_value(ind.clone())
|
||||
.map_err(|_| AuthControllerError::InvalidApiKeyIndexes(ind.clone()));
|
||||
self.indexes = ind?;
|
||||
}
|
||||
|
||||
if let Some(exp) = value.get("expiresAt") {
|
||||
self.expires_at = parse_expiration_date(exp)?;
|
||||
}
|
||||
|
||||
self.updated_at = Utc::now();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn default_admin() -> Self {
|
||||
Self {
|
||||
description: Some("Default Admin API Key (Use it for all other operations. Caution! Do not use it on a public frontend)".to_string()),
|
||||
id: generate_id(),
|
||||
actions: vec![Action::All],
|
||||
indexes: vec!["*".to_string()],
|
||||
expires_at: None,
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn default_search() -> Self {
|
||||
Self {
|
||||
description: Some(
|
||||
"Default Search API Key (Use it to search from the frontend)".to_string(),
|
||||
),
|
||||
id: generate_id(),
|
||||
actions: vec![Action::Search],
|
||||
indexes: vec!["*".to_string()],
|
||||
expires_at: None,
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a printable key of 64 characters using thread_rng.
|
||||
fn generate_id() -> [u8; KEY_ID_LENGTH] {
|
||||
const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut bytes = [0; KEY_ID_LENGTH];
|
||||
for byte in bytes.iter_mut() {
|
||||
*byte = CHARSET[rng.gen_range(0..CHARSET.len())];
|
||||
}
|
||||
|
||||
bytes
|
||||
}
|
||||
|
||||
fn parse_expiration_date(value: &Value) -> Result<Option<DateTime<Utc>>> {
|
||||
match value {
|
||||
Value::String(string) => DateTime::parse_from_rfc3339(string)
|
||||
.map(|d| d.into())
|
||||
.or_else(|_| {
|
||||
NaiveDateTime::parse_from_str(string, "%Y-%m-%dT%H:%M:%S")
|
||||
.map(|naive| DateTime::from_utc(naive, Utc))
|
||||
})
|
||||
.or_else(|_| {
|
||||
NaiveDate::parse_from_str(string, "%Y-%m-%d")
|
||||
.map(|naive| DateTime::from_utc(naive.and_hms(0, 0, 0), Utc))
|
||||
})
|
||||
.map_err(|_| AuthControllerError::InvalidApiKeyExpiresAt(value.clone()))
|
||||
// check if the key is already expired.
|
||||
.and_then(|d| {
|
||||
if d > Utc::now() {
|
||||
Ok(d)
|
||||
} else {
|
||||
Err(AuthControllerError::InvalidApiKeyExpiresAt(value.clone()))
|
||||
}
|
||||
})
|
||||
.map(Option::Some),
|
||||
Value::Null => Ok(None),
|
||||
_otherwise => Err(AuthControllerError::InvalidApiKeyExpiresAt(value.clone())),
|
||||
}
|
||||
}
|
||||
151
meilisearch-auth/src/lib.rs
Normal file
151
meilisearch-auth/src/lib.rs
Normal file
@@ -0,0 +1,151 @@
|
||||
mod action;
|
||||
mod dump;
|
||||
pub mod error;
|
||||
mod key;
|
||||
mod store;
|
||||
|
||||
use std::path::Path;
|
||||
use std::str::from_utf8;
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono::Utc;
|
||||
use serde_json::Value;
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
pub use action::{actions, Action};
|
||||
use error::{AuthControllerError, Result};
|
||||
pub use key::Key;
|
||||
use store::HeedAuthStore;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AuthController {
|
||||
store: Arc<HeedAuthStore>,
|
||||
master_key: Option<String>,
|
||||
}
|
||||
|
||||
impl AuthController {
|
||||
pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> {
|
||||
let store = HeedAuthStore::new(db_path)?;
|
||||
|
||||
if store.is_empty()? {
|
||||
generate_default_keys(&store)?;
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
store: Arc::new(store),
|
||||
master_key: master_key.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn create_key(&self, value: Value) -> Result<Key> {
|
||||
let key = Key::create_from_value(value)?;
|
||||
self.store.put_api_key(key)
|
||||
}
|
||||
|
||||
pub async fn update_key(&self, key: impl AsRef<str>, value: Value) -> Result<Key> {
|
||||
let mut key = self.get_key(key).await?;
|
||||
key.update_from_value(value)?;
|
||||
self.store.put_api_key(key)
|
||||
}
|
||||
|
||||
pub async fn get_key(&self, key: impl AsRef<str>) -> Result<Key> {
|
||||
self.store
|
||||
.get_api_key(&key)?
|
||||
.ok_or_else(|| AuthControllerError::ApiKeyNotFound(key.as_ref().to_string()))
|
||||
}
|
||||
|
||||
pub fn get_key_filters(&self, key: impl AsRef<str>) -> Result<AuthFilter> {
|
||||
let mut filters = AuthFilter::default();
|
||||
if self
|
||||
.master_key
|
||||
.as_ref()
|
||||
.map_or(false, |master_key| master_key != key.as_ref())
|
||||
{
|
||||
let key = self
|
||||
.store
|
||||
.get_api_key(&key)?
|
||||
.ok_or_else(|| AuthControllerError::ApiKeyNotFound(key.as_ref().to_string()))?;
|
||||
|
||||
if !key.indexes.iter().any(|i| i.as_str() == "*") {
|
||||
filters.indexes = Some(key.indexes);
|
||||
}
|
||||
|
||||
filters.allow_index_creation = key
|
||||
.actions
|
||||
.iter()
|
||||
.any(|&action| action == Action::IndexesAdd || action == Action::All);
|
||||
}
|
||||
|
||||
Ok(filters)
|
||||
}
|
||||
|
||||
pub async fn list_keys(&self) -> Result<Vec<Key>> {
|
||||
self.store.list_api_keys()
|
||||
}
|
||||
|
||||
pub async fn delete_key(&self, key: impl AsRef<str>) -> Result<()> {
|
||||
if self.store.delete_api_key(&key)? {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(AuthControllerError::ApiKeyNotFound(
|
||||
key.as_ref().to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_master_key(&self) -> Option<&String> {
|
||||
self.master_key.as_ref()
|
||||
}
|
||||
|
||||
pub fn authenticate(&self, token: &[u8], action: Action, index: Option<&[u8]>) -> Result<bool> {
|
||||
if let Some(master_key) = &self.master_key {
|
||||
if let Some((id, exp)) = self
|
||||
.store
|
||||
// check if the key has access to all indexes.
|
||||
.get_expiration_date(token, action, None)?
|
||||
.or(match index {
|
||||
// else check if the key has access to the requested index.
|
||||
Some(index) => self.store.get_expiration_date(token, action, Some(index))?,
|
||||
// or to any index if no index has been requested.
|
||||
None => self.store.prefix_first_expiration_date(token, action)?,
|
||||
})
|
||||
{
|
||||
let id = from_utf8(&id)?;
|
||||
if exp.map_or(true, |exp| Utc::now() < exp)
|
||||
&& generate_key(master_key.as_bytes(), id).as_bytes() == token
|
||||
{
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AuthFilter {
|
||||
pub indexes: Option<Vec<String>>,
|
||||
pub allow_index_creation: bool,
|
||||
}
|
||||
|
||||
impl Default for AuthFilter {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
indexes: None,
|
||||
allow_index_creation: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_key(master_key: &[u8], uid: &str) -> String {
|
||||
let key = [uid.as_bytes(), master_key].concat();
|
||||
let sha = Sha256::digest(&key);
|
||||
format!("{}{:x}", uid, sha)
|
||||
}
|
||||
|
||||
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
|
||||
store.put_api_key(Key::default_admin())?;
|
||||
store.put_api_key(Key::default_search())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
236
meilisearch-auth/src/store.rs
Normal file
236
meilisearch-auth/src/store.rs
Normal file
@@ -0,0 +1,236 @@
|
||||
use enum_iterator::IntoEnumIterator;
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::Reverse;
|
||||
use std::convert::TryFrom;
|
||||
use std::convert::TryInto;
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
|
||||
use heed::{Database, Env, EnvOpenOptions, RwTxn};
|
||||
|
||||
use super::error::Result;
|
||||
use super::{Action, Key};
|
||||
|
||||
const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB
|
||||
pub const KEY_ID_LENGTH: usize = 8;
|
||||
const AUTH_DB_PATH: &str = "auth";
|
||||
const KEY_DB_NAME: &str = "api-keys";
|
||||
const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration";
|
||||
|
||||
pub type KeyId = [u8; KEY_ID_LENGTH];
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct HeedAuthStore {
|
||||
env: Env,
|
||||
keys: Database<ByteSlice, SerdeJson<Key>>,
|
||||
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<DateTime<Utc>>>>,
|
||||
}
|
||||
|
||||
impl HeedAuthStore {
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let path = path.as_ref().join(AUTH_DB_PATH);
|
||||
create_dir_all(&path)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(AUTH_STORE_SIZE); // 1GB
|
||||
options.max_dbs(2);
|
||||
let env = options.open(path)?;
|
||||
let keys = env.create_database(Some(KEY_DB_NAME))?;
|
||||
let action_keyid_index_expiration =
|
||||
env.create_database(Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
|
||||
Ok(Self {
|
||||
env,
|
||||
keys,
|
||||
action_keyid_index_expiration,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> Result<bool> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
Ok(self.keys.len(&rtxn)? == 0)
|
||||
}
|
||||
|
||||
pub fn put_api_key(&self, key: Key) -> Result<Key> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.keys.put(&mut wtxn, &key.id, &key)?;
|
||||
|
||||
let id = key.id;
|
||||
// delete key from inverted database before refilling it.
|
||||
self.delete_key_from_inverted_db(&mut wtxn, &id)?;
|
||||
// create inverted database.
|
||||
let db = self.action_keyid_index_expiration;
|
||||
|
||||
let actions = if key.actions.contains(&Action::All) {
|
||||
// if key.actions contains All, we iterate over all actions.
|
||||
Action::into_enum_iter().collect()
|
||||
} else {
|
||||
key.actions.clone()
|
||||
};
|
||||
|
||||
let no_index_restriction = key.indexes.contains(&"*".to_owned());
|
||||
for action in actions {
|
||||
if no_index_restriction {
|
||||
// If there is no index restriction we put None.
|
||||
db.put(&mut wtxn, &(&id, &action, None), &key.expires_at)?;
|
||||
} else {
|
||||
// else we create a key for each index.
|
||||
for index in key.indexes.iter() {
|
||||
db.put(
|
||||
&mut wtxn,
|
||||
&(&id, &action, Some(index.as_bytes())),
|
||||
&key.expires_at,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(key)
|
||||
}
|
||||
|
||||
pub fn get_api_key(&self, key: impl AsRef<str>) -> Result<Option<Key>> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
match try_split_array_at::<_, KEY_ID_LENGTH>(key.as_ref().as_bytes()) {
|
||||
Some((id, _)) => self.keys.get(&rtxn, id).map_err(|e| e.into()),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_api_key(&self, key: impl AsRef<str>) -> Result<bool> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let existing = match try_split_array_at(key.as_ref().as_bytes()) {
|
||||
Some((id, _)) => {
|
||||
let existing = self.keys.delete(&mut wtxn, id)?;
|
||||
self.delete_key_from_inverted_db(&mut wtxn, id)?;
|
||||
existing
|
||||
}
|
||||
None => false,
|
||||
};
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(existing)
|
||||
}
|
||||
|
||||
pub fn list_api_keys(&self) -> Result<Vec<Key>> {
|
||||
let mut list = Vec::new();
|
||||
let rtxn = self.env.read_txn()?;
|
||||
for result in self.keys.remap_key_type::<DecodeIgnore>().iter(&rtxn)? {
|
||||
let (_, content) = result?;
|
||||
list.push(content);
|
||||
}
|
||||
list.sort_unstable_by_key(|k| Reverse(k.created_at));
|
||||
Ok(list)
|
||||
}
|
||||
|
||||
pub fn get_expiration_date(
|
||||
&self,
|
||||
key: &[u8],
|
||||
action: Action,
|
||||
index: Option<&[u8]>,
|
||||
) -> Result<Option<(KeyId, Option<DateTime<Utc>>)>> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
match try_split_array_at::<_, KEY_ID_LENGTH>(key) {
|
||||
Some((id, _)) => {
|
||||
let tuple = (id, &action, index);
|
||||
Ok(self
|
||||
.action_keyid_index_expiration
|
||||
.get(&rtxn, &tuple)?
|
||||
.map(|expiration| (*id, expiration)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn prefix_first_expiration_date(
|
||||
&self,
|
||||
key: &[u8],
|
||||
action: Action,
|
||||
) -> Result<Option<(KeyId, Option<DateTime<Utc>>)>> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
match try_split_array_at::<_, KEY_ID_LENGTH>(key) {
|
||||
Some((id, _)) => {
|
||||
let tuple = (id, &action, None);
|
||||
Ok(self
|
||||
.action_keyid_index_expiration
|
||||
.prefix_iter(&rtxn, &tuple)?
|
||||
.next()
|
||||
.transpose()?
|
||||
.map(|(_, expiration)| (*id, expiration)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
|
||||
let mut iter = self
|
||||
.action_keyid_index_expiration
|
||||
.remap_types::<ByteSlice, DecodeIgnore>()
|
||||
.prefix_iter_mut(wtxn, key)?;
|
||||
while iter.next().transpose()?.is_some() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Codec allowing to retrieve the expiration date of an action,
|
||||
/// optionnally on a spcific index, for a given key.
|
||||
pub struct KeyIdActionCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
|
||||
type DItem = (KeyId, Action, Option<&'a [u8]>);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (key_id, action_bytes) = try_split_array_at(bytes)?;
|
||||
let (action_bytes, index) = match try_split_array_at(action_bytes)? {
|
||||
(action, []) => (action, None),
|
||||
(action, index) => (action, Some(index)),
|
||||
};
|
||||
let action = Action::from_repr(u8::from_be_bytes(*action_bytes))?;
|
||||
|
||||
Some((*key_id, action, index))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec {
|
||||
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
|
||||
|
||||
fn bytes_encode((key_id, action, index): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
bytes.extend_from_slice(*key_id);
|
||||
let action_bytes = u8::to_be_bytes(action.repr());
|
||||
bytes.extend_from_slice(&action_bytes);
|
||||
if let Some(index) = index {
|
||||
bytes.extend_from_slice(index);
|
||||
}
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
/// Divides one slice into two at an index, returns `None` if mid is out of bounds.
|
||||
pub fn try_split_at<T>(slice: &[T], mid: usize) -> Option<(&[T], &[T])> {
|
||||
if mid <= slice.len() {
|
||||
Some(slice.split_at(mid))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Divides one slice into an array and the tail at an index,
|
||||
/// returns `None` if `N` is out of bounds.
|
||||
pub fn try_split_array_at<T, const N: usize>(slice: &[T]) -> Option<(&[T; N], &[T])>
|
||||
where
|
||||
[T; N]: for<'a> TryFrom<&'a [T]>,
|
||||
{
|
||||
let (head, tail) = try_split_at(slice, N)?;
|
||||
let head = head.try_into().ok()?;
|
||||
Some((head, tail))
|
||||
}
|
||||
@@ -1,57 +0,0 @@
|
||||
[package]
|
||||
name = "meilisearch-core"
|
||||
version = "0.14.1"
|
||||
license = "MIT"
|
||||
authors = ["Kerollmops <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
arc-swap = "0.4.5"
|
||||
bincode = "1.2.1"
|
||||
byteorder = "1.3.4"
|
||||
chrono = { version = "0.4.11", features = ["serde"] }
|
||||
compact_arena = "0.4.0"
|
||||
cow-utils = "0.1.2"
|
||||
crossbeam-channel = "0.4.2"
|
||||
deunicode = "1.1.0"
|
||||
either = "1.5.3"
|
||||
env_logger = "0.7.1"
|
||||
fst = "0.4.4"
|
||||
hashbrown = { version = "0.7.1", features = ["serde"] }
|
||||
heed = "0.8.0"
|
||||
indexmap = { version = "1.3.2", features = ["serde-1"] }
|
||||
intervaltree = "0.2.5"
|
||||
itertools = "0.9.0"
|
||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||
log = "0.4.8"
|
||||
meilisearch-error = { path = "../meilisearch-error", version = "0.14.1" }
|
||||
meilisearch-schema = { path = "../meilisearch-schema", version = "0.14.1" }
|
||||
meilisearch-tokenizer = { path = "../meilisearch-tokenizer", version = "0.14.1" }
|
||||
meilisearch-types = { path = "../meilisearch-types", version = "0.14.1" }
|
||||
once_cell = "1.3.1"
|
||||
ordered-float = { version = "1.0.2", features = ["serde"] }
|
||||
pest = { git = "https://github.com/MarinPostma/pest.git", tag = "meilisearch-patch1" }
|
||||
pest_derive = "2.0"
|
||||
regex = "1.3.6"
|
||||
sdset = "0.4.0"
|
||||
serde = { version = "1.0.105", features = ["derive"] }
|
||||
serde_json = { version = "1.0.50", features = ["preserve_order"] }
|
||||
slice-group-by = "0.2.6"
|
||||
unicase = "2.6.0"
|
||||
zerocopy = "0.3.0"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.3.0"
|
||||
criterion = "0.3.1"
|
||||
csv = "1.1.3"
|
||||
rustyline = { version = "6.0.0", default-features = false }
|
||||
structopt = "0.3.12"
|
||||
tempfile = "3.1.0"
|
||||
termcolor = "1.1.0"
|
||||
|
||||
[target.'cfg(unix)'.dev-dependencies]
|
||||
jemallocator = "0.3.2"
|
||||
|
||||
[[bench]]
|
||||
name = "search_benchmark"
|
||||
harness = false
|
||||
@@ -1,108 +0,0 @@
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate assert_matches;
|
||||
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::iter;
|
||||
use std::path::Path;
|
||||
use std::sync::mpsc;
|
||||
|
||||
use meilisearch_core::{Database, DatabaseOptions};
|
||||
use meilisearch_core::{ProcessedUpdateResult, UpdateStatus};
|
||||
use meilisearch_core::settings::{Settings, SettingsUpdate};
|
||||
use meilisearch_schema::Schema;
|
||||
use serde_json::Value;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
|
||||
|
||||
fn prepare_database(path: &Path) -> Database {
|
||||
let database = Database::open_or_create(path, DatabaseOptions::default()).unwrap();
|
||||
let db = &database;
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
|
||||
sender.send(update.update_id).unwrap()
|
||||
};
|
||||
let index = database.create_index("bench").unwrap();
|
||||
|
||||
database.set_update_callback(Box::new(update_fn));
|
||||
|
||||
db.main_write::<_, _, Box<dyn Error>>(|writer| {
|
||||
index.main.put_schema(writer, &Schema::with_primary_key("id")).unwrap();
|
||||
Ok(())
|
||||
}).unwrap();
|
||||
|
||||
let settings_update: SettingsUpdate = {
|
||||
let path = concat!(env!("CARGO_MANIFEST_DIR"), "/../datasets/movies/settings.json");
|
||||
let file = File::open(path).unwrap();
|
||||
let reader = BufReader::new(file);
|
||||
let settings: Settings = serde_json::from_reader(reader).unwrap();
|
||||
settings.to_update().unwrap()
|
||||
};
|
||||
|
||||
db.update_write::<_, _, Box<dyn Error>>(|writer| {
|
||||
let _update_id = index.settings_update(writer, settings_update).unwrap();
|
||||
Ok(())
|
||||
}).unwrap();
|
||||
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
let json: Value = {
|
||||
let path = concat!(env!("CARGO_MANIFEST_DIR"), "/../datasets/movies/movies.json");
|
||||
let movies_file = File::open(path).expect("find movies");
|
||||
serde_json::from_reader(movies_file).unwrap()
|
||||
};
|
||||
|
||||
let documents = json.as_array().unwrap();
|
||||
|
||||
for document in documents {
|
||||
additions.update_document(document);
|
||||
}
|
||||
|
||||
let update_id = db.update_write::<_, _, Box<dyn Error>>(|writer| {
|
||||
let update_id = additions.finalize(writer).unwrap();
|
||||
Ok(update_id)
|
||||
}).unwrap();
|
||||
|
||||
// block until the transaction is processed
|
||||
let _ = receiver.into_iter().find(|id| *id == update_id);
|
||||
|
||||
let update_reader = db.update_read_txn().unwrap();
|
||||
let result = index.update_status(&update_reader, update_id).unwrap();
|
||||
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
|
||||
|
||||
database
|
||||
}
|
||||
|
||||
pub fn criterion_benchmark(c: &mut Criterion) {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let database = prepare_database(dir.path());
|
||||
|
||||
let reader = database.main_read_txn().unwrap();
|
||||
let index = database.open_index("bench").unwrap();
|
||||
|
||||
let mut count = 0;
|
||||
let query = "I love paris ";
|
||||
|
||||
let iter = iter::from_fn(|| {
|
||||
count += 1;
|
||||
query.get(0..count)
|
||||
});
|
||||
|
||||
let mut group = c.benchmark_group("searching in movies (19654 docs)");
|
||||
group.sample_size(10);
|
||||
|
||||
for query in iter {
|
||||
let bench_name = BenchmarkId::from_parameter(format!("{:?}", query));
|
||||
group.bench_with_input(bench_name, &query, |b, query| b.iter(|| {
|
||||
let builder = index.query_builder();
|
||||
builder.query(&reader, query, 0..20).unwrap();
|
||||
}));
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, criterion_benchmark);
|
||||
criterion_main!(benches);
|
||||
@@ -1,470 +0,0 @@
|
||||
use std::collections::HashSet;
|
||||
use std::collections::btree_map::{BTreeMap, Entry};
|
||||
use std::error::Error;
|
||||
use std::io::{Read, Write};
|
||||
use std::iter::FromIterator;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{fs, io, sync::mpsc};
|
||||
|
||||
use rustyline::{Config, Editor};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use structopt::StructOpt;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
|
||||
use meilisearch_core::{Database, DatabaseOptions, Highlight, ProcessedUpdateResult};
|
||||
use meilisearch_core::settings::Settings;
|
||||
use meilisearch_schema::FieldId;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
struct IndexCommand {
|
||||
/// The destination where the database must be created.
|
||||
#[structopt(parse(from_os_str))]
|
||||
database_path: PathBuf,
|
||||
|
||||
#[structopt(long, default_value = "default")]
|
||||
index_uid: String,
|
||||
|
||||
/// The csv file path to index, you can also use `-` to specify the standard input.
|
||||
#[structopt(parse(from_os_str))]
|
||||
csv_data_path: PathBuf,
|
||||
|
||||
/// The path to the settings.
|
||||
#[structopt(long, parse(from_os_str))]
|
||||
settings: PathBuf,
|
||||
|
||||
#[structopt(long)]
|
||||
update_group_size: Option<usize>,
|
||||
|
||||
#[structopt(long, parse(from_os_str))]
|
||||
compact_to_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
struct SearchCommand {
|
||||
/// The path of the database to work with.
|
||||
#[structopt(parse(from_os_str))]
|
||||
database_path: PathBuf,
|
||||
|
||||
#[structopt(long, default_value = "default")]
|
||||
index_uid: String,
|
||||
|
||||
/// Timeout after which the search will return results.
|
||||
#[structopt(long)]
|
||||
fetch_timeout_ms: Option<u64>,
|
||||
|
||||
/// The number of returned results
|
||||
#[structopt(short, long, default_value = "10")]
|
||||
number_results: usize,
|
||||
|
||||
/// The number of characters before and after the first match
|
||||
#[structopt(short = "C", long, default_value = "35")]
|
||||
char_context: usize,
|
||||
|
||||
/// A filter string that can be `!adult` or `adult` to
|
||||
/// filter documents on this specfied field
|
||||
#[structopt(short, long)]
|
||||
filter: Option<String>,
|
||||
|
||||
/// Fields that must be displayed.
|
||||
displayed_fields: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
struct ShowUpdatesCommand {
|
||||
/// The path of the database to work with.
|
||||
#[structopt(parse(from_os_str))]
|
||||
database_path: PathBuf,
|
||||
|
||||
#[structopt(long, default_value = "default")]
|
||||
index_uid: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
enum Command {
|
||||
Index(IndexCommand),
|
||||
Search(SearchCommand),
|
||||
ShowUpdates(ShowUpdatesCommand),
|
||||
}
|
||||
|
||||
impl Command {
|
||||
fn path(&self) -> &Path {
|
||||
match self {
|
||||
Command::Index(command) => &command.database_path,
|
||||
Command::Search(command) => &command.database_path,
|
||||
Command::ShowUpdates(command) => &command.database_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
struct Document(indexmap::IndexMap<String, String>);
|
||||
|
||||
fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dyn Error>> {
|
||||
let start = Instant::now();
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn =
|
||||
move |_name: &str, update: ProcessedUpdateResult| sender.send(update.update_id).unwrap();
|
||||
let index = match database.open_index(&command.index_uid) {
|
||||
Some(index) => index,
|
||||
None => database.create_index(&command.index_uid).unwrap(),
|
||||
};
|
||||
|
||||
database.set_update_callback(Box::new(update_fn));
|
||||
|
||||
let db = &database;
|
||||
|
||||
let settings = {
|
||||
let string = fs::read_to_string(&command.settings)?;
|
||||
let settings: Settings = serde_json::from_str(&string).unwrap();
|
||||
settings.to_update().unwrap()
|
||||
};
|
||||
|
||||
db.update_write(|w| index.settings_update(w, settings))?;
|
||||
|
||||
let mut rdr = if command.csv_data_path.as_os_str() == "-" {
|
||||
csv::Reader::from_reader(Box::new(io::stdin()) as Box<dyn Read>)
|
||||
} else {
|
||||
let file = std::fs::File::open(command.csv_data_path)?;
|
||||
csv::Reader::from_reader(Box::new(file) as Box<dyn Read>)
|
||||
};
|
||||
|
||||
let mut raw_record = csv::StringRecord::new();
|
||||
let headers = rdr.headers()?.clone();
|
||||
|
||||
let mut max_update_id = 0;
|
||||
let mut i = 0;
|
||||
let mut end_of_file = false;
|
||||
|
||||
while !end_of_file {
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
loop {
|
||||
end_of_file = !rdr.read_record(&mut raw_record)?;
|
||||
if end_of_file {
|
||||
break;
|
||||
}
|
||||
|
||||
let document: Document = match raw_record.deserialize(Some(&headers)) {
|
||||
Ok(document) => document,
|
||||
Err(e) => {
|
||||
eprintln!("{:?}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
additions.update_document(document);
|
||||
|
||||
print!("\rindexing document {}", i);
|
||||
i += 1;
|
||||
|
||||
if let Some(group_size) = command.update_group_size {
|
||||
if i % group_size == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!();
|
||||
|
||||
let update_id = db.update_write(|w| additions.finalize(w))?;
|
||||
|
||||
println!("committing update...");
|
||||
max_update_id = max_update_id.max(update_id);
|
||||
println!("committed update {}", update_id);
|
||||
}
|
||||
|
||||
println!("Waiting for update {}", max_update_id);
|
||||
for id in receiver {
|
||||
if id == max_update_id {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
println!(
|
||||
"database created in {:.2?} at: {:?}",
|
||||
start.elapsed(),
|
||||
command.database_path
|
||||
);
|
||||
|
||||
if let Some(path) = command.compact_to_path {
|
||||
fs::create_dir_all(&path)?;
|
||||
let start = Instant::now();
|
||||
let _file = database.copy_and_compact_to_path(path.join("data.mdb"))?;
|
||||
println!(
|
||||
"database compacted in {:.2?} at: {:?}",
|
||||
start.elapsed(),
|
||||
path
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
|
||||
let mut stdout = StandardStream::stdout(ColorChoice::Always);
|
||||
let mut highlighted = false;
|
||||
|
||||
for range in ranges.windows(2) {
|
||||
let [start, end] = match range {
|
||||
[start, end] => [*start, *end],
|
||||
_ => unreachable!(),
|
||||
};
|
||||
if highlighted {
|
||||
stdout.set_color(
|
||||
ColorSpec::new()
|
||||
.set_fg(Some(Color::Yellow))
|
||||
.set_underline(true),
|
||||
)?;
|
||||
}
|
||||
write!(&mut stdout, "{}", &text[start..end])?;
|
||||
stdout.reset()?;
|
||||
highlighted = !highlighted;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
|
||||
let mut byte_index = 0;
|
||||
let mut byte_length = 0;
|
||||
|
||||
for (n, (i, c)) in text.char_indices().enumerate() {
|
||||
if n == index {
|
||||
byte_index = i;
|
||||
}
|
||||
|
||||
if n + 1 == index + length {
|
||||
byte_length = i - byte_index + c.len_utf8();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
(byte_index, byte_length)
|
||||
}
|
||||
|
||||
fn create_highlight_areas(text: &str, highlights: &[Highlight]) -> Vec<usize> {
|
||||
let mut byte_indexes = BTreeMap::new();
|
||||
|
||||
for highlight in highlights {
|
||||
let char_index = highlight.char_index as usize;
|
||||
let char_length = highlight.char_length as usize;
|
||||
let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
|
||||
|
||||
match byte_indexes.entry(byte_index) {
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(byte_length);
|
||||
}
|
||||
Entry::Occupied(mut entry) => {
|
||||
if *entry.get() < byte_length {
|
||||
entry.insert(byte_length);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut title_areas = Vec::new();
|
||||
title_areas.push(0);
|
||||
for (byte_index, length) in byte_indexes {
|
||||
title_areas.push(byte_index);
|
||||
title_areas.push(byte_index + length);
|
||||
}
|
||||
title_areas.push(text.len());
|
||||
title_areas.sort_unstable();
|
||||
title_areas
|
||||
}
|
||||
|
||||
/// note: matches must have been sorted by `char_index` and `char_length` before being passed.
|
||||
///
|
||||
/// ```no_run
|
||||
/// matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||
///
|
||||
/// let matches = matches.matches.iter().filter(|m| SchemaAttr::new(m.attribute) == attr).cloned();
|
||||
///
|
||||
/// let (text, matches) = crop_text(&text, matches, 35);
|
||||
/// ```
|
||||
fn crop_text(
|
||||
text: &str,
|
||||
highlights: impl IntoIterator<Item = Highlight>,
|
||||
context: usize,
|
||||
) -> (String, Vec<Highlight>) {
|
||||
let mut highlights = highlights.into_iter().peekable();
|
||||
|
||||
let char_index = highlights
|
||||
.peek()
|
||||
.map(|m| m.char_index as usize)
|
||||
.unwrap_or(0);
|
||||
let start = char_index.saturating_sub(context);
|
||||
let text = text.chars().skip(start).take(context * 2).collect();
|
||||
|
||||
let highlights = highlights
|
||||
.take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2))
|
||||
.map(|highlight| Highlight {
|
||||
char_index: highlight.char_index - start as u16,
|
||||
..highlight
|
||||
})
|
||||
.collect();
|
||||
|
||||
(text, highlights)
|
||||
}
|
||||
|
||||
fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<dyn Error>> {
|
||||
let db = &database;
|
||||
let index = database
|
||||
.open_index(&command.index_uid)
|
||||
.expect("Could not find index");
|
||||
|
||||
let reader = db.main_read_txn().unwrap();
|
||||
let schema = index.main.schema(&reader)?;
|
||||
reader.abort().unwrap();
|
||||
|
||||
let schema = schema.ok_or(meilisearch_core::Error::SchemaMissing)?;
|
||||
|
||||
let fields = command.displayed_fields.iter().map(String::as_str);
|
||||
let fields = HashSet::from_iter(fields);
|
||||
|
||||
let config = Config::builder().auto_add_history(true).build();
|
||||
let mut readline = Editor::<()>::with_config(config);
|
||||
let _ = readline.load_history("query-history.txt");
|
||||
|
||||
for result in readline.iter("Searching for: ") {
|
||||
match result {
|
||||
Ok(query) => {
|
||||
let start_total = Instant::now();
|
||||
|
||||
let reader = db.main_read_txn().unwrap();
|
||||
let ref_index = &index;
|
||||
let ref_reader = &reader;
|
||||
|
||||
let mut builder = index.query_builder();
|
||||
if let Some(timeout) = command.fetch_timeout_ms {
|
||||
builder.with_fetch_timeout(Duration::from_millis(timeout));
|
||||
}
|
||||
|
||||
if let Some(ref filter) = command.filter {
|
||||
let filter = filter.as_str();
|
||||
let (positive, filter) = if filter.chars().next() == Some('!') {
|
||||
(false, &filter[1..])
|
||||
} else {
|
||||
(true, filter)
|
||||
};
|
||||
|
||||
let attr = schema
|
||||
.id(filter)
|
||||
.expect("Could not find filtered attribute");
|
||||
|
||||
builder.with_filter(move |document_id| {
|
||||
let string: String = ref_index
|
||||
.document_attribute(ref_reader, document_id, attr)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
(string == "true") == positive
|
||||
});
|
||||
}
|
||||
|
||||
let result = builder.query(ref_reader, Some(&query), 0..command.number_results)?;
|
||||
|
||||
let mut retrieve_duration = Duration::default();
|
||||
|
||||
let number_of_documents = result.documents.len();
|
||||
for mut doc in result.documents {
|
||||
doc.highlights
|
||||
.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||
|
||||
let start_retrieve = Instant::now();
|
||||
let result = index.document::<Document>(&reader, Some(&fields), doc.id);
|
||||
retrieve_duration += start_retrieve.elapsed();
|
||||
|
||||
match result {
|
||||
Ok(Some(document)) => {
|
||||
println!("raw-id: {:?}", doc.id);
|
||||
for (name, text) in document.0 {
|
||||
print!("{}: ", name);
|
||||
|
||||
let attr = schema.id(&name).unwrap();
|
||||
let highlights = doc
|
||||
.highlights
|
||||
.iter()
|
||||
.filter(|m| FieldId::new(m.attribute) == attr)
|
||||
.cloned();
|
||||
let (text, highlights) =
|
||||
crop_text(&text, highlights, command.char_context);
|
||||
let areas = create_highlight_areas(&text, &highlights);
|
||||
display_highlights(&text, &areas)?;
|
||||
println!();
|
||||
}
|
||||
}
|
||||
Ok(None) => eprintln!("missing document"),
|
||||
Err(e) => eprintln!("{}", e),
|
||||
}
|
||||
|
||||
let mut matching_attributes = HashSet::new();
|
||||
for highlight in doc.highlights {
|
||||
let attr = FieldId::new(highlight.attribute);
|
||||
let name = schema.name(attr);
|
||||
matching_attributes.insert(name);
|
||||
}
|
||||
|
||||
let matching_attributes = Vec::from_iter(matching_attributes);
|
||||
println!("matching in: {:?}", matching_attributes);
|
||||
|
||||
println!();
|
||||
}
|
||||
|
||||
eprintln!(
|
||||
"whole documents fields retrieve took {:.2?}",
|
||||
retrieve_duration
|
||||
);
|
||||
eprintln!(
|
||||
"===== Found {} results in {:.2?} =====",
|
||||
number_of_documents,
|
||||
start_total.elapsed()
|
||||
);
|
||||
}
|
||||
Err(err) => {
|
||||
println!("Error: {:?}", err);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readline.save_history("query-history.txt").unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn show_updates_command(
|
||||
command: ShowUpdatesCommand,
|
||||
database: Database,
|
||||
) -> Result<(), Box<dyn Error>> {
|
||||
let db = &database;
|
||||
let index = database
|
||||
.open_index(&command.index_uid)
|
||||
.expect("Could not find index");
|
||||
|
||||
let reader = db.update_read_txn().unwrap();
|
||||
let updates = index.all_updates_status(&reader)?;
|
||||
println!("{:#?}", updates);
|
||||
reader.abort().unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
env_logger::init();
|
||||
|
||||
let opt = Command::from_args();
|
||||
let database = Database::open_or_create(opt.path(), DatabaseOptions::default())?;
|
||||
|
||||
match opt {
|
||||
Command::Index(command) => index_command(command, database),
|
||||
Command::Search(command) => search_command(command, database),
|
||||
Command::ShowUpdates(command) => show_updates_command(command, database),
|
||||
}
|
||||
}
|
||||
@@ -1,53 +0,0 @@
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||
use once_cell::sync::OnceCell;
|
||||
|
||||
static LEVDIST0: OnceCell<LevBuilder> = OnceCell::new();
|
||||
static LEVDIST1: OnceCell<LevBuilder> = OnceCell::new();
|
||||
static LEVDIST2: OnceCell<LevBuilder> = OnceCell::new();
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum PrefixSetting {
|
||||
Prefix,
|
||||
NoPrefix,
|
||||
}
|
||||
|
||||
fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
|
||||
use PrefixSetting::{NoPrefix, Prefix};
|
||||
|
||||
match query.len() {
|
||||
0..=4 => {
|
||||
let builder = LEVDIST0.get_or_init(|| LevBuilder::new(0, true));
|
||||
match setting {
|
||||
Prefix => builder.build_prefix_dfa(query),
|
||||
NoPrefix => builder.build_dfa(query),
|
||||
}
|
||||
}
|
||||
5..=8 => {
|
||||
let builder = LEVDIST1.get_or_init(|| LevBuilder::new(1, true));
|
||||
match setting {
|
||||
Prefix => builder.build_prefix_dfa(query),
|
||||
NoPrefix => builder.build_dfa(query),
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let builder = LEVDIST2.get_or_init(|| LevBuilder::new(2, true));
|
||||
match setting {
|
||||
Prefix => builder.build_prefix_dfa(query),
|
||||
NoPrefix => builder.build_dfa(query),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_prefix_dfa(query: &str) -> DFA {
|
||||
build_dfa_with_setting(query, PrefixSetting::Prefix)
|
||||
}
|
||||
|
||||
pub fn build_dfa(query: &str) -> DFA {
|
||||
build_dfa_with_setting(query, PrefixSetting::NoPrefix)
|
||||
}
|
||||
|
||||
pub fn build_exact_dfa(query: &str) -> DFA {
|
||||
let builder = LEVDIST0.get_or_init(|| LevBuilder::new(0, true));
|
||||
builder.build_dfa(query)
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
mod dfa;
|
||||
|
||||
use meilisearch_tokenizer::is_cjk;
|
||||
|
||||
pub use self::dfa::{build_dfa, build_prefix_dfa, build_exact_dfa};
|
||||
|
||||
pub fn normalize_str(string: &str) -> String {
|
||||
let mut string = string.to_lowercase();
|
||||
|
||||
if !string.contains(is_cjk) {
|
||||
string = deunicode::deunicode_with_tofu(&string, "");
|
||||
}
|
||||
|
||||
string
|
||||
}
|
||||
@@ -1,661 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::mem;
|
||||
use std::ops::Deref;
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::time::Instant;
|
||||
use std::fmt;
|
||||
|
||||
use compact_arena::{SmallArena, Idx32, mk_arena};
|
||||
use log::{debug, error};
|
||||
use sdset::{Set, SetBuf, exponential_search, SetOperation, Counter, duo::OpBuilder};
|
||||
use slice_group_by::{GroupBy, GroupByMut};
|
||||
|
||||
use meilisearch_types::DocIndex;
|
||||
|
||||
use crate::criterion::{Criteria, Context, ContextMut};
|
||||
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
|
||||
use crate::raw_document::RawDocument;
|
||||
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
|
||||
use crate::{store, Document, DocumentId, MResult, Index, RankedMap, MainReader, Error};
|
||||
use crate::query_tree::{create_query_tree, traverse_query_tree};
|
||||
use crate::query_tree::{Operation, QueryResult, QueryKind, QueryId, PostingsKey};
|
||||
use crate::query_tree::Context as QTContext;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SortResult {
|
||||
pub documents: Vec<Document>,
|
||||
pub nb_hits: usize,
|
||||
pub exhaustive_nb_hit: bool,
|
||||
pub facets: Option<HashMap<String, HashMap<String, usize>>>,
|
||||
pub exhaustive_facets_count: Option<bool>,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn bucket_sort<'c, FI>(
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
query: &str,
|
||||
range: Range<usize>,
|
||||
facets_docids: Option<SetBuf<DocumentId>>,
|
||||
facet_count_docids: Option<HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>>,
|
||||
filter: Option<FI>,
|
||||
criteria: Criteria<'c>,
|
||||
searchable_attrs: Option<ReorderedAttrs>,
|
||||
index: &Index,
|
||||
) -> MResult<SortResult>
|
||||
where
|
||||
FI: Fn(DocumentId) -> bool,
|
||||
{
|
||||
// We delegate the filter work to the distinct query builder,
|
||||
// specifying a distinct rule that has no effect.
|
||||
if filter.is_some() {
|
||||
let distinct = |_| None;
|
||||
let distinct_size = 1;
|
||||
return bucket_sort_with_distinct(
|
||||
reader,
|
||||
query,
|
||||
range,
|
||||
facets_docids,
|
||||
facet_count_docids,
|
||||
filter,
|
||||
distinct,
|
||||
distinct_size,
|
||||
criteria,
|
||||
searchable_attrs,
|
||||
index,
|
||||
);
|
||||
}
|
||||
|
||||
let mut result = SortResult::default();
|
||||
|
||||
let words_set = index.main.words_fst(reader)?;
|
||||
let stop_words = index.main.stop_words_fst(reader)?;
|
||||
|
||||
let context = QTContext {
|
||||
words_set,
|
||||
stop_words,
|
||||
synonyms: index.synonyms,
|
||||
postings_lists: index.postings_lists,
|
||||
prefix_postings_lists: index.prefix_postings_lists_cache,
|
||||
};
|
||||
|
||||
let (operation, mapping) = create_query_tree(reader, &context, query)?;
|
||||
debug!("operation:\n{:?}", operation);
|
||||
debug!("mapping:\n{:?}", mapping);
|
||||
|
||||
fn recurs_operation<'o>(map: &mut HashMap<QueryId, &'o QueryKind>, operation: &'o Operation) {
|
||||
match operation {
|
||||
Operation::And(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
|
||||
Operation::Or(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
|
||||
Operation::Query(query) => { map.insert(query.id, &query.kind); },
|
||||
}
|
||||
}
|
||||
|
||||
let mut queries_kinds = HashMap::new();
|
||||
recurs_operation(&mut queries_kinds, &operation);
|
||||
|
||||
let QueryResult { mut docids, queries } = traverse_query_tree(reader, &context, &operation)?;
|
||||
debug!("found {} documents", docids.len());
|
||||
debug!("number of postings {:?}", queries.len());
|
||||
|
||||
if let Some(facets_docids) = facets_docids {
|
||||
let intersection = sdset::duo::OpBuilder::new(docids.as_ref(), facets_docids.as_set())
|
||||
.intersection()
|
||||
.into_set_buf();
|
||||
docids = Cow::Owned(intersection);
|
||||
}
|
||||
|
||||
if let Some(f) = facet_count_docids {
|
||||
// hardcoded value, until approximation optimization
|
||||
result.exhaustive_facets_count = Some(true);
|
||||
result.facets = Some(facet_count(f, &docids));
|
||||
}
|
||||
|
||||
let before = Instant::now();
|
||||
mk_arena!(arena);
|
||||
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
|
||||
debug!("matches cleaned in {:.02?}", before.elapsed());
|
||||
|
||||
let before_bucket_sort = Instant::now();
|
||||
|
||||
let before_raw_documents_building = Instant::now();
|
||||
let mut raw_documents = Vec::new();
|
||||
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
||||
let raw_document = RawDocument::new(bare_matches, &mut arena, searchable_attrs.as_ref());
|
||||
raw_documents.push(raw_document);
|
||||
}
|
||||
debug!("creating {} candidates documents took {:.02?}",
|
||||
raw_documents.len(),
|
||||
before_raw_documents_building.elapsed(),
|
||||
);
|
||||
|
||||
let before_criterion_loop = Instant::now();
|
||||
let proximity_count = AtomicUsize::new(0);
|
||||
|
||||
let mut groups = vec![raw_documents.as_mut_slice()];
|
||||
|
||||
'criteria: for criterion in criteria.as_ref() {
|
||||
let tmp_groups = mem::replace(&mut groups, Vec::new());
|
||||
let mut documents_seen = 0;
|
||||
|
||||
for mut group in tmp_groups {
|
||||
let before_criterion_preparation = Instant::now();
|
||||
|
||||
let ctx = ContextMut {
|
||||
reader,
|
||||
postings_lists: &mut arena,
|
||||
query_mapping: &mapping,
|
||||
documents_fields_counts_store: index.documents_fields_counts,
|
||||
};
|
||||
|
||||
criterion.prepare(ctx, &mut group)?;
|
||||
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
|
||||
|
||||
let ctx = Context {
|
||||
postings_lists: &arena,
|
||||
query_mapping: &mapping,
|
||||
};
|
||||
|
||||
let before_criterion_sort = Instant::now();
|
||||
group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
|
||||
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
|
||||
|
||||
for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
|
||||
debug!("{:?} produced a group of size {}", criterion.name(), group.len());
|
||||
|
||||
documents_seen += group.len();
|
||||
groups.push(group);
|
||||
|
||||
// we have sort enough documents if the last document sorted is after
|
||||
// the end of the requested range, we can continue to the next criterion
|
||||
if documents_seen >= range.end {
|
||||
continue 'criteria;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("criterion loop took {:.02?}", before_criterion_loop.elapsed());
|
||||
debug!("proximity evaluation called {} times", proximity_count.load(Ordering::Relaxed));
|
||||
|
||||
let schema = index.main.schema(reader)?.ok_or(Error::SchemaMissing)?;
|
||||
let iter = raw_documents.into_iter().skip(range.start).take(range.len());
|
||||
let iter = iter.map(|rd| Document::from_raw(rd, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
|
||||
let documents = iter.collect();
|
||||
|
||||
debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed());
|
||||
|
||||
result.documents = documents;
|
||||
result.nb_hits = docids.len();
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
query: &str,
|
||||
range: Range<usize>,
|
||||
facets_docids: Option<SetBuf<DocumentId>>,
|
||||
facet_count_docids: Option<HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>>,
|
||||
filter: Option<FI>,
|
||||
distinct: FD,
|
||||
distinct_size: usize,
|
||||
criteria: Criteria<'c>,
|
||||
searchable_attrs: Option<ReorderedAttrs>,
|
||||
index: &Index,
|
||||
) -> MResult<SortResult>
|
||||
where
|
||||
FI: Fn(DocumentId) -> bool,
|
||||
FD: Fn(DocumentId) -> Option<u64>,
|
||||
{
|
||||
let mut result = SortResult::default();
|
||||
|
||||
let words_set = index.main.words_fst(reader)?;
|
||||
let stop_words = index.main.stop_words_fst(reader)?;
|
||||
|
||||
let context = QTContext {
|
||||
words_set,
|
||||
stop_words,
|
||||
synonyms: index.synonyms,
|
||||
postings_lists: index.postings_lists,
|
||||
prefix_postings_lists: index.prefix_postings_lists_cache,
|
||||
};
|
||||
|
||||
let (operation, mapping) = create_query_tree(reader, &context, query)?;
|
||||
debug!("operation:\n{:?}", operation);
|
||||
debug!("mapping:\n{:?}", mapping);
|
||||
|
||||
fn recurs_operation<'o>(map: &mut HashMap<QueryId, &'o QueryKind>, operation: &'o Operation) {
|
||||
match operation {
|
||||
Operation::And(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
|
||||
Operation::Or(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
|
||||
Operation::Query(query) => { map.insert(query.id, &query.kind); },
|
||||
}
|
||||
}
|
||||
|
||||
let mut queries_kinds = HashMap::new();
|
||||
recurs_operation(&mut queries_kinds, &operation);
|
||||
|
||||
let QueryResult { mut docids, queries } = traverse_query_tree(reader, &context, &operation)?;
|
||||
debug!("found {} documents", docids.len());
|
||||
debug!("number of postings {:?}", queries.len());
|
||||
|
||||
if let Some(facets_docids) = facets_docids {
|
||||
let intersection = OpBuilder::new(docids.as_ref(), facets_docids.as_set())
|
||||
.intersection()
|
||||
.into_set_buf();
|
||||
docids = Cow::Owned(intersection);
|
||||
}
|
||||
|
||||
if let Some(f) = facet_count_docids {
|
||||
// hardcoded value, until approximation optimization
|
||||
result.exhaustive_facets_count = Some(true);
|
||||
result.facets = Some(facet_count(f, &docids));
|
||||
}
|
||||
|
||||
let before = Instant::now();
|
||||
mk_arena!(arena);
|
||||
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
|
||||
debug!("matches cleaned in {:.02?}", before.elapsed());
|
||||
|
||||
let before_raw_documents_building = Instant::now();
|
||||
let mut raw_documents = Vec::new();
|
||||
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
||||
let raw_document = RawDocument::new(bare_matches, &mut arena, searchable_attrs.as_ref());
|
||||
raw_documents.push(raw_document);
|
||||
}
|
||||
debug!("creating {} candidates documents took {:.02?}",
|
||||
raw_documents.len(),
|
||||
before_raw_documents_building.elapsed(),
|
||||
);
|
||||
|
||||
let mut groups = vec![raw_documents.as_mut_slice()];
|
||||
let mut key_cache = HashMap::new();
|
||||
|
||||
let mut filter_map = HashMap::new();
|
||||
// these two variables informs on the current distinct map and
|
||||
// on the raw offset of the start of the group where the
|
||||
// range.start bound is located according to the distinct function
|
||||
let mut distinct_map = DistinctMap::new(distinct_size);
|
||||
let mut distinct_raw_offset = 0;
|
||||
|
||||
'criteria: for criterion in criteria.as_ref() {
|
||||
let tmp_groups = mem::replace(&mut groups, Vec::new());
|
||||
let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map);
|
||||
let mut documents_seen = 0;
|
||||
|
||||
for mut group in tmp_groups {
|
||||
// if this group does not overlap with the requested range,
|
||||
// push it without sorting and splitting it
|
||||
if documents_seen + group.len() < distinct_raw_offset {
|
||||
documents_seen += group.len();
|
||||
groups.push(group);
|
||||
continue;
|
||||
}
|
||||
|
||||
let ctx = ContextMut {
|
||||
reader,
|
||||
postings_lists: &mut arena,
|
||||
query_mapping: &mapping,
|
||||
documents_fields_counts_store: index.documents_fields_counts,
|
||||
};
|
||||
|
||||
let before_criterion_preparation = Instant::now();
|
||||
criterion.prepare(ctx, &mut group)?;
|
||||
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
|
||||
|
||||
let ctx = Context {
|
||||
postings_lists: &arena,
|
||||
query_mapping: &mapping,
|
||||
};
|
||||
|
||||
let before_criterion_sort = Instant::now();
|
||||
group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
|
||||
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
|
||||
|
||||
for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
|
||||
// we must compute the real distinguished len of this sub-group
|
||||
for document in group.iter() {
|
||||
let filter_accepted = match &filter {
|
||||
Some(filter) => {
|
||||
let entry = filter_map.entry(document.id);
|
||||
*entry.or_insert_with(|| (filter)(document.id))
|
||||
}
|
||||
None => true,
|
||||
};
|
||||
|
||||
if filter_accepted {
|
||||
let entry = key_cache.entry(document.id);
|
||||
let key = entry.or_insert_with(|| (distinct)(document.id).map(Rc::new));
|
||||
|
||||
match key.clone() {
|
||||
Some(key) => buf_distinct.register(key),
|
||||
None => buf_distinct.register_without_key(),
|
||||
};
|
||||
}
|
||||
|
||||
// the requested range end is reached: stop computing distinct
|
||||
if buf_distinct.len() >= range.end {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
documents_seen += group.len();
|
||||
groups.push(group);
|
||||
|
||||
// if this sub-group does not overlap with the requested range
|
||||
// we must update the distinct map and its start index
|
||||
if buf_distinct.len() < range.start {
|
||||
buf_distinct.transfert_to_internal();
|
||||
distinct_raw_offset = documents_seen;
|
||||
}
|
||||
|
||||
// we have sort enough documents if the last document sorted is after
|
||||
// the end of the requested range, we can continue to the next criterion
|
||||
if buf_distinct.len() >= range.end {
|
||||
continue 'criteria;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// once we classified the documents related to the current
|
||||
// automatons we save that as the next valid result
|
||||
let mut seen = BufferedDistinctMap::new(&mut distinct_map);
|
||||
let schema = index.main.schema(reader)?.ok_or(Error::SchemaMissing)?;
|
||||
|
||||
let mut documents = Vec::with_capacity(range.len());
|
||||
for raw_document in raw_documents.into_iter().skip(distinct_raw_offset) {
|
||||
let filter_accepted = match &filter {
|
||||
Some(_) => filter_map.remove(&raw_document.id).unwrap_or_else(|| {
|
||||
error!("error during filtering: expected value for document id {}", &raw_document.id.0);
|
||||
Default::default()
|
||||
}),
|
||||
None => true,
|
||||
};
|
||||
|
||||
if filter_accepted {
|
||||
let key = key_cache.remove(&raw_document.id).unwrap_or_else(|| {
|
||||
error!("error during distinct: expected value for document id {}", &raw_document.id.0);
|
||||
Default::default()
|
||||
});
|
||||
let distinct_accepted = match key {
|
||||
Some(key) => seen.register(key),
|
||||
None => seen.register_without_key(),
|
||||
};
|
||||
|
||||
if distinct_accepted && seen.len() > range.start {
|
||||
documents.push(Document::from_raw(raw_document, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
|
||||
if documents.len() == range.len() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result.documents = documents;
|
||||
result.nb_hits = docids.len();
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn cleanup_bare_matches<'tag, 'txn>(
|
||||
arena: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
docids: &Set<DocumentId>,
|
||||
queries: HashMap<PostingsKey, Cow<'txn, Set<DocIndex>>>,
|
||||
) -> Vec<BareMatch<'tag>>
|
||||
{
|
||||
let docidslen = docids.len() as f32;
|
||||
let mut bare_matches = Vec::new();
|
||||
|
||||
for (PostingsKey { query, input, distance, is_exact }, matches) in queries {
|
||||
let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches));
|
||||
let pllen = postings_list_view.len() as f32;
|
||||
|
||||
if docidslen / pllen >= 0.8 {
|
||||
let mut offset = 0;
|
||||
for matches in postings_list_view.linear_group_by_key(|m| m.document_id) {
|
||||
let document_id = matches[0].document_id;
|
||||
if docids.contains(&document_id) {
|
||||
let range = postings_list_view.range(offset, matches.len());
|
||||
let posting_list_index = arena.add(range);
|
||||
|
||||
let bare_match = BareMatch {
|
||||
document_id,
|
||||
query_index: query.id,
|
||||
distance,
|
||||
is_exact,
|
||||
postings_list: posting_list_index,
|
||||
};
|
||||
|
||||
bare_matches.push(bare_match);
|
||||
}
|
||||
|
||||
offset += matches.len();
|
||||
}
|
||||
|
||||
} else {
|
||||
let mut offset = 0;
|
||||
for id in docids.as_slice() {
|
||||
let di = DocIndex { document_id: *id, ..DocIndex::default() };
|
||||
let pos = exponential_search(&postings_list_view[offset..], &di).unwrap_or_else(|x| x);
|
||||
|
||||
offset += pos;
|
||||
|
||||
let group = postings_list_view[offset..]
|
||||
.linear_group_by_key(|m| m.document_id)
|
||||
.next()
|
||||
.filter(|matches| matches[0].document_id == *id);
|
||||
|
||||
if let Some(matches) = group {
|
||||
let range = postings_list_view.range(offset, matches.len());
|
||||
let posting_list_index = arena.add(range);
|
||||
|
||||
let bare_match = BareMatch {
|
||||
document_id: *id,
|
||||
query_index: query.id,
|
||||
distance,
|
||||
is_exact,
|
||||
postings_list: posting_list_index,
|
||||
};
|
||||
|
||||
bare_matches.push(bare_match);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let before_raw_documents_presort = Instant::now();
|
||||
bare_matches.sort_unstable_by_key(|sm| sm.document_id);
|
||||
debug!("sort by documents ids took {:.02?}", before_raw_documents_presort.elapsed());
|
||||
|
||||
bare_matches
|
||||
}
|
||||
|
||||
pub struct BareMatch<'tag> {
|
||||
pub document_id: DocumentId,
|
||||
pub query_index: usize,
|
||||
pub distance: u8,
|
||||
pub is_exact: bool,
|
||||
pub postings_list: Idx32<'tag>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for BareMatch<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("BareMatch")
|
||||
.field("document_id", &self.document_id)
|
||||
.field("query_index", &self.query_index)
|
||||
.field("distance", &self.distance)
|
||||
.field("is_exact", &self.is_exact)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct SimpleMatch {
|
||||
pub query_index: usize,
|
||||
pub distance: u8,
|
||||
pub attribute: u16,
|
||||
pub word_index: u16,
|
||||
pub is_exact: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum PostingsListView<'txn> {
|
||||
Original {
|
||||
input: Rc<[u8]>,
|
||||
postings_list: Rc<Cow<'txn, Set<DocIndex>>>,
|
||||
offset: usize,
|
||||
len: usize,
|
||||
},
|
||||
Rewritten {
|
||||
input: Rc<[u8]>,
|
||||
postings_list: SetBuf<DocIndex>,
|
||||
},
|
||||
}
|
||||
|
||||
impl fmt::Debug for PostingsListView<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("PostingsListView")
|
||||
.field("input", &std::str::from_utf8(&self.input()).unwrap())
|
||||
.field("postings_list", &self.as_ref())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'txn> PostingsListView<'txn> {
|
||||
pub fn original(input: Rc<[u8]>, postings_list: Rc<Cow<'txn, Set<DocIndex>>>) -> PostingsListView<'txn> {
|
||||
let len = postings_list.len();
|
||||
PostingsListView::Original { input, postings_list, offset: 0, len }
|
||||
}
|
||||
|
||||
pub fn rewritten(input: Rc<[u8]>, postings_list: SetBuf<DocIndex>) -> PostingsListView<'txn> {
|
||||
PostingsListView::Rewritten { input, postings_list }
|
||||
}
|
||||
|
||||
pub fn rewrite_with(&mut self, postings_list: SetBuf<DocIndex>) {
|
||||
let input = match self {
|
||||
PostingsListView::Original { input, .. } => input.clone(),
|
||||
PostingsListView::Rewritten { input, .. } => input.clone(),
|
||||
};
|
||||
*self = PostingsListView::rewritten(input, postings_list);
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
PostingsListView::Original { len, .. } => *len,
|
||||
PostingsListView::Rewritten { postings_list, .. } => postings_list.len(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn input(&self) -> &[u8] {
|
||||
match self {
|
||||
PostingsListView::Original { ref input, .. } => input,
|
||||
PostingsListView::Rewritten { ref input, .. } => input,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn range(&self, range_offset: usize, range_len: usize) -> PostingsListView<'txn> {
|
||||
match self {
|
||||
PostingsListView::Original { input, postings_list, offset, len } => {
|
||||
assert!(range_offset + range_len <= *len);
|
||||
PostingsListView::Original {
|
||||
input: input.clone(),
|
||||
postings_list: postings_list.clone(),
|
||||
offset: offset + range_offset,
|
||||
len: range_len,
|
||||
}
|
||||
},
|
||||
PostingsListView::Rewritten { .. } => {
|
||||
panic!("Cannot create a range on a rewritten postings list view");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Set<DocIndex>> for PostingsListView<'_> {
|
||||
fn as_ref(&self) -> &Set<DocIndex> {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for PostingsListView<'_> {
|
||||
type Target = Set<DocIndex>;
|
||||
|
||||
fn deref(&self) -> &Set<DocIndex> {
|
||||
match *self {
|
||||
PostingsListView::Original { ref postings_list, offset, len, .. } => {
|
||||
Set::new_unchecked(&postings_list[offset..offset + len])
|
||||
},
|
||||
PostingsListView::Rewritten { ref postings_list, .. } => postings_list,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// sorts documents ids according to user defined ranking rules.
|
||||
pub fn placeholder_document_sort(
|
||||
document_ids: &mut [DocumentId],
|
||||
index: &store::Index,
|
||||
reader: &MainReader,
|
||||
ranked_map: &RankedMap
|
||||
) -> MResult<()> {
|
||||
use crate::settings::RankingRule;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
enum SortOrder {
|
||||
Asc,
|
||||
Desc,
|
||||
}
|
||||
|
||||
if let Some(ranking_rules) = index.main.ranking_rules(reader)? {
|
||||
let schema = index.main.schema(reader)?
|
||||
.ok_or(Error::SchemaMissing)?;
|
||||
|
||||
// Select custom rules from ranking rules, and map them to custom rules
|
||||
// containing a field_id
|
||||
let ranking_rules = ranking_rules.iter().filter_map(|r|
|
||||
match r {
|
||||
RankingRule::Asc(name) => schema.id(name).map(|f| (f, SortOrder::Asc)),
|
||||
RankingRule::Desc(name) => schema.id(name).map(|f| (f, SortOrder::Desc)),
|
||||
_ => None,
|
||||
}).collect::<Vec<_>>();
|
||||
|
||||
document_ids.sort_unstable_by(|a, b| {
|
||||
for (field_id, order) in &ranking_rules {
|
||||
let a_value = ranked_map.get(*a, *field_id);
|
||||
let b_value = ranked_map.get(*b, *field_id);
|
||||
let (a, b) = match order {
|
||||
SortOrder::Asc => (a_value, b_value),
|
||||
SortOrder::Desc => (b_value, a_value),
|
||||
};
|
||||
match a.cmp(&b) {
|
||||
Ordering::Equal => continue,
|
||||
ordering => return ordering,
|
||||
}
|
||||
}
|
||||
Ordering::Equal
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// For each entry in facet_docids, calculates the number of documents in the intersection with candidate_docids.
|
||||
pub fn facet_count(
|
||||
facet_docids: HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>,
|
||||
candidate_docids: &Set<DocumentId>,
|
||||
) -> HashMap<String, HashMap<String, usize>> {
|
||||
let mut facets_counts = HashMap::with_capacity(facet_docids.len());
|
||||
for (key, doc_map) in facet_docids {
|
||||
let mut count_map = HashMap::with_capacity(doc_map.len());
|
||||
for (_, (value, docids)) in doc_map {
|
||||
let mut counter = Counter::new();
|
||||
let op = OpBuilder::new(docids.as_ref(), candidate_docids).intersection();
|
||||
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
||||
count_map.insert(value.to_string(), counter.0);
|
||||
}
|
||||
facets_counts.insert(key, count_map);
|
||||
}
|
||||
facets_counts
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::{RawDocument, MResult};
|
||||
use crate::bucket_sort::SimpleMatch;
|
||||
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||
|
||||
pub struct Attribute;
|
||||
|
||||
impl Criterion for Attribute {
|
||||
fn name(&self) -> &str { "attribute" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_mapping);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
#[inline]
|
||||
fn sum_of_attribute(matches: &[SimpleMatch]) -> usize {
|
||||
let mut sum_of_attribute = 0;
|
||||
for group in matches.linear_group_by_key(|bm| bm.query_index) {
|
||||
sum_of_attribute += group[0].attribute as usize;
|
||||
}
|
||||
sum_of_attribute
|
||||
}
|
||||
|
||||
let lhs = sum_of_attribute(&lhs.processed_matches);
|
||||
let rhs = sum_of_attribute(&rhs.processed_matches);
|
||||
|
||||
lhs.cmp(&rhs)
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use crate::RawDocument;
|
||||
use super::{Criterion, Context};
|
||||
|
||||
pub struct DocumentId;
|
||||
|
||||
impl Criterion for DocumentId {
|
||||
fn name(&self) -> &str { "stable document id" }
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
let lhs = &lhs.id;
|
||||
let rhs = &rhs.id;
|
||||
|
||||
lhs.cmp(rhs)
|
||||
}
|
||||
}
|
||||
@@ -1,78 +0,0 @@
|
||||
use std::cmp::{Ordering, Reverse};
|
||||
use std::collections::hash_map::{HashMap, Entry};
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::{RawDocument, MResult};
|
||||
use crate::bucket_sort::BareMatch;
|
||||
use super::{Criterion, Context, ContextMut};
|
||||
|
||||
pub struct Exactness;
|
||||
|
||||
impl Criterion for Exactness {
|
||||
fn name(&self) -> &str { "exactness" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
let store = ctx.documents_fields_counts_store;
|
||||
let reader = ctx.reader;
|
||||
|
||||
'documents: for doc in documents {
|
||||
doc.bare_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
|
||||
|
||||
// mark the document if we find a "one word field" that matches
|
||||
let mut fields_counts = HashMap::new();
|
||||
for group in doc.bare_matches.linear_group_by_key(|bm| bm.query_index) {
|
||||
for group in group.linear_group_by_key(|bm| bm.is_exact) {
|
||||
if !group[0].is_exact { break }
|
||||
|
||||
for bm in group {
|
||||
for di in ctx.postings_lists[bm.postings_list].as_ref() {
|
||||
|
||||
let attr = IndexedPos(di.attribute);
|
||||
let count = match fields_counts.entry(attr) {
|
||||
Entry::Occupied(entry) => *entry.get(),
|
||||
Entry::Vacant(entry) => {
|
||||
let count = store.document_field_count(reader, doc.id, attr)?;
|
||||
*entry.insert(count)
|
||||
},
|
||||
};
|
||||
|
||||
if count == Some(1) {
|
||||
doc.contains_one_word_field = true;
|
||||
continue 'documents
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
#[inline]
|
||||
fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
|
||||
let mut sum_exact_query_words = 0;
|
||||
|
||||
for group in matches.linear_group_by_key(|bm| bm.query_index) {
|
||||
sum_exact_query_words += group[0].is_exact as usize;
|
||||
}
|
||||
|
||||
sum_exact_query_words
|
||||
}
|
||||
|
||||
// does it contains a "one word field"
|
||||
lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
|
||||
// if not, with document contains the more exact words
|
||||
.then_with(|| {
|
||||
let lhs = sum_exact_query_words(&lhs.bare_matches);
|
||||
let rhs = sum_exact_query_words(&rhs.bare_matches);
|
||||
lhs.cmp(&rhs).reverse()
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,292 +0,0 @@
|
||||
use std::cmp::{self, Ordering};
|
||||
use std::collections::HashMap;
|
||||
use std::ops::Range;
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
use sdset::SetBuf;
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::bucket_sort::{SimpleMatch, PostingsListView};
|
||||
use crate::database::MainT;
|
||||
use crate::query_tree::QueryId;
|
||||
use crate::{store, RawDocument, MResult};
|
||||
|
||||
mod typo;
|
||||
mod words;
|
||||
mod proximity;
|
||||
mod attribute;
|
||||
mod words_position;
|
||||
mod exactness;
|
||||
mod document_id;
|
||||
mod sort_by_attr;
|
||||
|
||||
pub use self::typo::Typo;
|
||||
pub use self::words::Words;
|
||||
pub use self::proximity::Proximity;
|
||||
pub use self::attribute::Attribute;
|
||||
pub use self::words_position::WordsPosition;
|
||||
pub use self::exactness::Exactness;
|
||||
pub use self::document_id::DocumentId;
|
||||
pub use self::sort_by_attr::SortByAttr;
|
||||
|
||||
pub trait Criterion {
|
||||
fn name(&self) -> &str;
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
_ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
_documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate<'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: &Context<'p, 'tag, 'txn, 'q>,
|
||||
lhs: &RawDocument<'r, 'tag>,
|
||||
rhs: &RawDocument<'r, 'tag>,
|
||||
) -> Ordering;
|
||||
|
||||
#[inline]
|
||||
fn eq<'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: &Context<'p, 'tag, 'txn, 'q>,
|
||||
lhs: &RawDocument<'r, 'tag>,
|
||||
rhs: &RawDocument<'r, 'tag>,
|
||||
) -> bool
|
||||
{
|
||||
self.evaluate(ctx, lhs, rhs) == Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ContextMut<'h, 'p, 'tag, 'txn, 'q> {
|
||||
pub reader: &'h heed::RoTxn<MainT>,
|
||||
pub postings_lists: &'p mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
pub query_mapping: &'q HashMap<QueryId, Range<usize>>,
|
||||
pub documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
}
|
||||
|
||||
pub struct Context<'p, 'tag, 'txn, 'q> {
|
||||
pub postings_lists: &'p SmallArena<'tag, PostingsListView<'txn>>,
|
||||
pub query_mapping: &'q HashMap<QueryId, Range<usize>>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct CriteriaBuilder<'a> {
|
||||
inner: Vec<Box<dyn Criterion + 'a>>,
|
||||
}
|
||||
|
||||
impl<'a> CriteriaBuilder<'a> {
|
||||
pub fn new() -> CriteriaBuilder<'a> {
|
||||
CriteriaBuilder { inner: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn with_capacity(capacity: usize) -> CriteriaBuilder<'a> {
|
||||
CriteriaBuilder {
|
||||
inner: Vec::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reserve(&mut self, additional: usize) {
|
||||
self.inner.reserve(additional)
|
||||
}
|
||||
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn add<C: 'a>(mut self, criterion: C) -> CriteriaBuilder<'a>
|
||||
where
|
||||
C: Criterion,
|
||||
{
|
||||
self.push(criterion);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn push<C: 'a>(&mut self, criterion: C)
|
||||
where
|
||||
C: Criterion,
|
||||
{
|
||||
self.inner.push(Box::new(criterion));
|
||||
}
|
||||
|
||||
pub fn build(self) -> Criteria<'a> {
|
||||
Criteria { inner: self.inner }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Criteria<'a> {
|
||||
inner: Vec<Box<dyn Criterion + 'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Default for Criteria<'a> {
|
||||
fn default() -> Self {
|
||||
CriteriaBuilder::with_capacity(7)
|
||||
.add(Typo)
|
||||
.add(Words)
|
||||
.add(Proximity)
|
||||
.add(Attribute)
|
||||
.add(WordsPosition)
|
||||
.add(Exactness)
|
||||
.add(DocumentId)
|
||||
.build()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> AsRef<[Box<dyn Criterion + 'a>]> for Criteria<'a> {
|
||||
fn as_ref(&self) -> &[Box<dyn Criterion + 'a>] {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_query_distances<'a, 'tag, 'txn>(
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
query_mapping: &HashMap<QueryId, Range<usize>>,
|
||||
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
) {
|
||||
for document in documents {
|
||||
if !document.processed_distances.is_empty() { continue }
|
||||
|
||||
let mut processed = Vec::new();
|
||||
for m in document.bare_matches.iter() {
|
||||
if postings_lists[m.postings_list].is_empty() { continue }
|
||||
|
||||
let range = query_mapping[&(m.query_index as usize)].clone();
|
||||
let new_len = cmp::max(range.end as usize, processed.len());
|
||||
processed.resize(new_len, None);
|
||||
|
||||
for index in range {
|
||||
let index = index as usize;
|
||||
processed[index] = match processed[index] {
|
||||
Some(distance) if distance > m.distance => Some(m.distance),
|
||||
Some(distance) => Some(distance),
|
||||
None => Some(m.distance),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
document.processed_distances = processed;
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_bare_matches<'a, 'tag, 'txn>(
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
query_mapping: &HashMap<QueryId, Range<usize>>,
|
||||
) {
|
||||
for document in documents {
|
||||
if !document.processed_matches.is_empty() { continue }
|
||||
|
||||
let mut processed = Vec::new();
|
||||
for m in document.bare_matches.iter() {
|
||||
let postings_list = &postings_lists[m.postings_list];
|
||||
processed.reserve(postings_list.len());
|
||||
for di in postings_list.as_ref() {
|
||||
let simple_match = SimpleMatch {
|
||||
query_index: m.query_index,
|
||||
distance: m.distance,
|
||||
attribute: di.attribute,
|
||||
word_index: di.word_index,
|
||||
is_exact: m.is_exact,
|
||||
};
|
||||
processed.push(simple_match);
|
||||
}
|
||||
}
|
||||
|
||||
let processed = multiword_rewrite_matches(&mut processed, query_mapping);
|
||||
document.processed_matches = processed.into_vec();
|
||||
}
|
||||
}
|
||||
|
||||
fn multiword_rewrite_matches(
|
||||
matches: &mut [SimpleMatch],
|
||||
query_mapping: &HashMap<QueryId, Range<usize>>,
|
||||
) -> SetBuf<SimpleMatch>
|
||||
{
|
||||
matches.sort_unstable_by_key(|m| (m.attribute, m.word_index));
|
||||
|
||||
let mut padded_matches = Vec::with_capacity(matches.len());
|
||||
|
||||
// let before_padding = Instant::now();
|
||||
// for each attribute of each document
|
||||
for same_document_attribute in matches.linear_group_by_key(|m| m.attribute) {
|
||||
// padding will only be applied
|
||||
// to word indices in the same attribute
|
||||
let mut padding = 0;
|
||||
let mut iter = same_document_attribute.linear_group_by_key(|m| m.word_index);
|
||||
|
||||
// for each match at the same position
|
||||
// in this document attribute
|
||||
while let Some(same_word_index) = iter.next() {
|
||||
// find the biggest padding
|
||||
let mut biggest = 0;
|
||||
for match_ in same_word_index {
|
||||
let mut replacement = query_mapping[&(match_.query_index as usize)].clone();
|
||||
let replacement_len = replacement.len();
|
||||
let nexts = iter.remainder().linear_group_by_key(|m| m.word_index);
|
||||
|
||||
if let Some(query_index) = replacement.next() {
|
||||
let word_index = match_.word_index + padding as u16;
|
||||
let match_ = SimpleMatch { query_index, word_index, ..*match_ };
|
||||
padded_matches.push(match_);
|
||||
}
|
||||
|
||||
let mut found = false;
|
||||
|
||||
// look ahead and if there already is a match
|
||||
// corresponding to this padding word, abort the padding
|
||||
'padding: for (x, next_group) in nexts.enumerate() {
|
||||
for (i, query_index) in replacement.clone().enumerate().skip(x) {
|
||||
let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
|
||||
let padmatch = SimpleMatch { query_index, word_index, ..*match_ };
|
||||
|
||||
for nmatch_ in next_group {
|
||||
let mut rep = query_mapping[&(nmatch_.query_index as usize)].clone();
|
||||
let query_index = rep.next().unwrap();
|
||||
if query_index == padmatch.query_index {
|
||||
if !found {
|
||||
// if we find a corresponding padding for the
|
||||
// first time we must push preceding paddings
|
||||
for (i, query_index) in replacement.clone().enumerate().take(i) {
|
||||
let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
|
||||
let match_ = SimpleMatch { query_index, word_index, ..*match_ };
|
||||
padded_matches.push(match_);
|
||||
biggest = biggest.max(i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
padded_matches.push(padmatch);
|
||||
found = true;
|
||||
continue 'padding;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if we do not find a corresponding padding in the
|
||||
// next groups so stop here and pad what was found
|
||||
break;
|
||||
}
|
||||
|
||||
if !found {
|
||||
// if no padding was found in the following matches
|
||||
// we must insert the entire padding
|
||||
for (i, query_index) in replacement.enumerate() {
|
||||
let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
|
||||
let match_ = SimpleMatch { query_index, word_index, ..*match_ };
|
||||
padded_matches.push(match_);
|
||||
}
|
||||
|
||||
biggest = biggest.max(replacement_len - 1);
|
||||
}
|
||||
}
|
||||
|
||||
padding += biggest;
|
||||
}
|
||||
}
|
||||
|
||||
// debug!("padding matches took {:.02?}", before_padding.elapsed());
|
||||
|
||||
// With this check we can see that the loop above takes something
|
||||
// like 43% of the search time even when no rewrite is needed.
|
||||
// assert_eq!(before_matches, padded_matches);
|
||||
|
||||
SetBuf::from_dirty(padded_matches)
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
use std::cmp::{self, Ordering};
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::bucket_sort::{SimpleMatch};
|
||||
use crate::{RawDocument, MResult};
|
||||
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||
|
||||
const MAX_DISTANCE: u16 = 8;
|
||||
|
||||
pub struct Proximity;
|
||||
|
||||
impl Criterion for Proximity {
|
||||
fn name(&self) -> &str { "proximity" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_mapping);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
fn index_proximity(lhs: u16, rhs: u16) -> u16 {
|
||||
if lhs < rhs {
|
||||
cmp::min(rhs - lhs, MAX_DISTANCE)
|
||||
} else {
|
||||
cmp::min(lhs - rhs, MAX_DISTANCE) + 1
|
||||
}
|
||||
}
|
||||
|
||||
fn attribute_proximity(lhs: SimpleMatch, rhs: SimpleMatch) -> u16 {
|
||||
if lhs.attribute != rhs.attribute { MAX_DISTANCE }
|
||||
else { index_proximity(lhs.word_index, rhs.word_index) }
|
||||
}
|
||||
|
||||
fn min_proximity(lhs: &[SimpleMatch], rhs: &[SimpleMatch]) -> u16 {
|
||||
let mut min_prox = u16::max_value();
|
||||
for a in lhs {
|
||||
for b in rhs {
|
||||
let prox = attribute_proximity(*a, *b);
|
||||
min_prox = cmp::min(min_prox, prox);
|
||||
}
|
||||
}
|
||||
min_prox
|
||||
}
|
||||
|
||||
fn matches_proximity(matches: &[SimpleMatch],) -> u16 {
|
||||
let mut proximity = 0;
|
||||
let mut iter = matches.linear_group_by_key(|m| m.query_index);
|
||||
|
||||
// iterate over groups by windows of size 2
|
||||
let mut last = iter.next();
|
||||
while let (Some(lhs), Some(rhs)) = (last, iter.next()) {
|
||||
proximity += min_proximity(lhs, rhs);
|
||||
last = Some(rhs);
|
||||
}
|
||||
|
||||
proximity
|
||||
}
|
||||
|
||||
let lhs = matches_proximity(&lhs.processed_matches);
|
||||
let rhs = matches_proximity(&rhs.processed_matches);
|
||||
|
||||
lhs.cmp(&rhs)
|
||||
}
|
||||
}
|
||||
@@ -1,129 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
use meilisearch_schema::{Schema, FieldId};
|
||||
use crate::{RankedMap, RawDocument};
|
||||
use super::{Criterion, Context};
|
||||
|
||||
/// An helper struct that permit to sort documents by
|
||||
/// some of their stored attributes.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// If a document cannot be deserialized it will be considered [`None`][].
|
||||
///
|
||||
/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
|
||||
/// so you must check the [`Ord`] of `Option` implementation.
|
||||
///
|
||||
/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
|
||||
/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```ignore
|
||||
/// use serde_derive::Deserialize;
|
||||
/// use meilisearch::rank::criterion::*;
|
||||
///
|
||||
/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
|
||||
///
|
||||
/// let builder = CriteriaBuilder::with_capacity(8)
|
||||
/// .add(Typo)
|
||||
/// .add(Words)
|
||||
/// .add(Proximity)
|
||||
/// .add(Attribute)
|
||||
/// .add(WordsPosition)
|
||||
/// .add(Exactness)
|
||||
/// .add(custom_ranking)
|
||||
/// .add(DocumentId);
|
||||
///
|
||||
/// let criterion = builder.build();
|
||||
///
|
||||
/// ```
|
||||
pub struct SortByAttr<'a> {
|
||||
ranked_map: &'a RankedMap,
|
||||
field_id: FieldId,
|
||||
reversed: bool,
|
||||
}
|
||||
|
||||
impl<'a> SortByAttr<'a> {
|
||||
pub fn lower_is_better(
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||
SortByAttr::new(ranked_map, schema, attr_name, false)
|
||||
}
|
||||
|
||||
pub fn higher_is_better(
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||
SortByAttr::new(ranked_map, schema, attr_name, true)
|
||||
}
|
||||
|
||||
fn new(
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
reversed: bool,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||
let field_id = match schema.id(attr_name) {
|
||||
Some(field_id) => field_id,
|
||||
None => return Err(SortByAttrError::AttributeNotFound),
|
||||
};
|
||||
|
||||
if !schema.is_ranked(field_id) {
|
||||
return Err(SortByAttrError::AttributeNotRegisteredForRanking);
|
||||
}
|
||||
|
||||
Ok(SortByAttr {
|
||||
ranked_map,
|
||||
field_id,
|
||||
reversed,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Criterion for SortByAttr<'_> {
|
||||
fn name(&self) -> &str {
|
||||
"sort by attribute"
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
let lhs = self.ranked_map.get(lhs.id, self.field_id);
|
||||
let rhs = self.ranked_map.get(rhs.id, self.field_id);
|
||||
|
||||
match (lhs, rhs) {
|
||||
(Some(lhs), Some(rhs)) => {
|
||||
let order = lhs.cmp(&rhs);
|
||||
if self.reversed {
|
||||
order.reverse()
|
||||
} else {
|
||||
order
|
||||
}
|
||||
}
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, None) => Ordering::Equal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum SortByAttrError {
|
||||
AttributeNotFound,
|
||||
AttributeNotRegisteredForRanking,
|
||||
}
|
||||
|
||||
impl fmt::Display for SortByAttrError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use SortByAttrError::*;
|
||||
match self {
|
||||
AttributeNotFound => f.write_str("attribute not found in the schema"),
|
||||
AttributeNotRegisteredForRanking => f.write_str("attribute not registered for ranking"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SortByAttrError {}
|
||||
@@ -1,56 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use crate::{RawDocument, MResult};
|
||||
use super::{Criterion, Context, ContextMut, prepare_query_distances};
|
||||
|
||||
pub struct Typo;
|
||||
|
||||
impl Criterion for Typo {
|
||||
fn name(&self) -> &str { "typo" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_query_distances(documents, ctx.query_mapping, ctx.postings_lists);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
// This function is a wrong logarithmic 10 function.
|
||||
// It is safe to panic on input number higher than 3,
|
||||
// the number of typos is never bigger than that.
|
||||
#[inline]
|
||||
#[allow(clippy::approx_constant)]
|
||||
fn custom_log10(n: u8) -> f32 {
|
||||
match n {
|
||||
0 => 0.0, // log(1)
|
||||
1 => 0.30102, // log(2)
|
||||
2 => 0.47712, // log(3)
|
||||
3 => 0.60205, // log(4)
|
||||
_ => panic!("invalid number"),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn compute_typos(distances: &[Option<u8>]) -> usize {
|
||||
let mut number_words: usize = 0;
|
||||
let mut sum_typos = 0.0;
|
||||
|
||||
for distance in distances {
|
||||
if let Some(distance) = distance {
|
||||
sum_typos += custom_log10(*distance);
|
||||
number_words += 1;
|
||||
}
|
||||
}
|
||||
|
||||
(number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
|
||||
}
|
||||
|
||||
let lhs = compute_typos(&lhs.processed_distances);
|
||||
let rhs = compute_typos(&rhs.processed_distances);
|
||||
|
||||
lhs.cmp(&rhs).reverse()
|
||||
}
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use crate::{RawDocument, MResult};
|
||||
use super::{Criterion, Context, ContextMut, prepare_query_distances};
|
||||
|
||||
pub struct Words;
|
||||
|
||||
impl Criterion for Words {
|
||||
fn name(&self) -> &str { "words" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_query_distances(documents, ctx.query_mapping, ctx.postings_lists);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
#[inline]
|
||||
fn number_of_query_words(distances: &[Option<u8>]) -> usize {
|
||||
distances.iter().cloned().filter(Option::is_some).count()
|
||||
}
|
||||
|
||||
let lhs = number_of_query_words(&lhs.processed_distances);
|
||||
let rhs = number_of_query_words(&rhs.processed_distances);
|
||||
|
||||
lhs.cmp(&rhs).reverse()
|
||||
}
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::bucket_sort::SimpleMatch;
|
||||
use crate::{RawDocument, MResult};
|
||||
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||
|
||||
pub struct WordsPosition;
|
||||
|
||||
impl Criterion for WordsPosition {
|
||||
fn name(&self) -> &str { "words position" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_mapping);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
#[inline]
|
||||
fn sum_words_position(matches: &[SimpleMatch]) -> usize {
|
||||
let mut sum_words_position = 0;
|
||||
for group in matches.linear_group_by_key(|bm| bm.query_index) {
|
||||
sum_words_position += group[0].word_index as usize;
|
||||
}
|
||||
sum_words_position
|
||||
}
|
||||
|
||||
let lhs = sum_words_position(&lhs.processed_matches);
|
||||
let rhs = sum_words_position(&rhs.processed_matches);
|
||||
|
||||
lhs.cmp(&rhs)
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,103 +0,0 @@
|
||||
use hashbrown::HashMap;
|
||||
use std::hash::Hash;
|
||||
|
||||
pub struct DistinctMap<K> {
|
||||
inner: HashMap<K, usize>,
|
||||
limit: usize,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl<K: Hash + Eq> DistinctMap<K> {
|
||||
pub fn new(limit: usize) -> Self {
|
||||
DistinctMap {
|
||||
inner: HashMap::new(),
|
||||
limit,
|
||||
len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.len
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BufferedDistinctMap<'a, K> {
|
||||
internal: &'a mut DistinctMap<K>,
|
||||
inner: HashMap<K, usize>,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl<'a, K: Hash + Eq> BufferedDistinctMap<'a, K> {
|
||||
pub fn new(internal: &'a mut DistinctMap<K>) -> BufferedDistinctMap<'a, K> {
|
||||
BufferedDistinctMap {
|
||||
internal,
|
||||
inner: HashMap::new(),
|
||||
len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register(&mut self, key: K) -> bool {
|
||||
let internal_seen = self.internal.inner.get(&key).unwrap_or(&0);
|
||||
let inner_seen = self.inner.entry(key).or_insert(0);
|
||||
let seen = *internal_seen + *inner_seen;
|
||||
|
||||
if seen < self.internal.limit {
|
||||
*inner_seen += 1;
|
||||
self.len += 1;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register_without_key(&mut self) -> bool {
|
||||
self.len += 1;
|
||||
true
|
||||
}
|
||||
|
||||
pub fn transfert_to_internal(&mut self) {
|
||||
for (k, v) in self.inner.drain() {
|
||||
let value = self.internal.inner.entry(k).or_insert(0);
|
||||
*value += v;
|
||||
}
|
||||
|
||||
self.internal.len += self.len;
|
||||
self.len = 0;
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.internal.len() + self.len
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn easy_distinct_map() {
|
||||
let mut map = DistinctMap::new(2);
|
||||
let mut buffered = BufferedDistinctMap::new(&mut map);
|
||||
|
||||
for x in &[1, 1, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6] {
|
||||
buffered.register(x);
|
||||
}
|
||||
buffered.transfert_to_internal();
|
||||
assert_eq!(map.len(), 8);
|
||||
|
||||
let mut map = DistinctMap::new(2);
|
||||
let mut buffered = BufferedDistinctMap::new(&mut map);
|
||||
assert_eq!(buffered.register(1), true);
|
||||
assert_eq!(buffered.register(1), true);
|
||||
assert_eq!(buffered.register(1), false);
|
||||
assert_eq!(buffered.register(1), false);
|
||||
|
||||
assert_eq!(buffered.register(2), true);
|
||||
assert_eq!(buffered.register(3), true);
|
||||
assert_eq!(buffered.register(2), true);
|
||||
assert_eq!(buffered.register(2), false);
|
||||
|
||||
buffered.transfert_to_internal();
|
||||
assert_eq!(map.len(), 5);
|
||||
}
|
||||
}
|
||||
@@ -1,224 +0,0 @@
|
||||
use crate::serde::{DeserializerError, SerializerError};
|
||||
use serde_json::Error as SerdeJsonError;
|
||||
use pest::error::Error as PestError;
|
||||
use crate::filters::Rule;
|
||||
use std::{error, fmt, io};
|
||||
|
||||
pub use bincode::Error as BincodeError;
|
||||
pub use fst::Error as FstError;
|
||||
pub use heed::Error as HeedError;
|
||||
pub use pest::error as pest_error;
|
||||
|
||||
use meilisearch_error::{ErrorCode, Code};
|
||||
|
||||
pub type MResult<T> = Result<T, Error>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Bincode(bincode::Error),
|
||||
Deserializer(DeserializerError),
|
||||
FacetError(FacetError),
|
||||
FilterParseError(PestError<Rule>),
|
||||
Fst(fst::Error),
|
||||
Heed(heed::Error),
|
||||
IndexAlreadyExists,
|
||||
Io(io::Error),
|
||||
MaxFieldsLimitExceeded,
|
||||
MissingDocumentId,
|
||||
MissingPrimaryKey,
|
||||
Schema(meilisearch_schema::Error),
|
||||
SchemaMissing,
|
||||
SerdeJson(SerdeJsonError),
|
||||
Serializer(SerializerError),
|
||||
VersionMismatch(String),
|
||||
WordIndexMissing,
|
||||
}
|
||||
|
||||
impl ErrorCode for Error {
|
||||
fn error_code(&self) -> Code {
|
||||
use Error::*;
|
||||
|
||||
match self {
|
||||
FacetError(_) => Code::Facet,
|
||||
FilterParseError(_) => Code::Filter,
|
||||
IndexAlreadyExists => Code::IndexAlreadyExists,
|
||||
MissingPrimaryKey => Code::MissingPrimaryKey,
|
||||
MissingDocumentId => Code::MissingDocumentId,
|
||||
MaxFieldsLimitExceeded => Code::MaxFieldsLimitExceeded,
|
||||
Schema(s) => s.error_code(),
|
||||
WordIndexMissing
|
||||
| SchemaMissing => Code::InvalidState,
|
||||
Heed(_)
|
||||
| Fst(_)
|
||||
| SerdeJson(_)
|
||||
| Bincode(_)
|
||||
| Serializer(_)
|
||||
| Deserializer(_)
|
||||
| VersionMismatch(_)
|
||||
| Io(_) => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(error: io::Error) -> Error {
|
||||
Error::Io(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PestError<Rule>> for Error {
|
||||
fn from(error: PestError<Rule>) -> Error {
|
||||
Error::FilterParseError(error.renamed_rules(|r| {
|
||||
let s = match r {
|
||||
Rule::or => "OR",
|
||||
Rule::and => "AND",
|
||||
Rule::not => "NOT",
|
||||
Rule::string => "string",
|
||||
Rule::word => "word",
|
||||
Rule::greater => "field > value",
|
||||
Rule::less => "field < value",
|
||||
Rule::eq => "field = value",
|
||||
Rule::leq => "field <= value",
|
||||
Rule::geq => "field >= value",
|
||||
Rule::key => "key",
|
||||
_ => "other",
|
||||
};
|
||||
s.to_string()
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FacetError> for Error {
|
||||
fn from(error: FacetError) -> Error {
|
||||
Error::FacetError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<meilisearch_schema::Error> for Error {
|
||||
fn from(error: meilisearch_schema::Error) -> Error {
|
||||
Error::Schema(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<HeedError> for Error {
|
||||
fn from(error: HeedError) -> Error {
|
||||
Error::Heed(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FstError> for Error {
|
||||
fn from(error: FstError) -> Error {
|
||||
Error::Fst(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SerdeJsonError> for Error {
|
||||
fn from(error: SerdeJsonError) -> Error {
|
||||
Error::SerdeJson(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BincodeError> for Error {
|
||||
fn from(error: BincodeError) -> Error {
|
||||
Error::Bincode(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SerializerError> for Error {
|
||||
fn from(error: SerializerError) -> Error {
|
||||
match error {
|
||||
SerializerError::DocumentIdNotFound => Error::MissingDocumentId,
|
||||
e => Error::Serializer(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DeserializerError> for Error {
|
||||
fn from(error: DeserializerError) -> Error {
|
||||
Error::Deserializer(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::Error::*;
|
||||
match self {
|
||||
Bincode(e) => write!(f, "bincode error; {}", e),
|
||||
Deserializer(e) => write!(f, "deserializer error; {}", e),
|
||||
FacetError(e) => write!(f, "error processing facet filter: {}", e),
|
||||
FilterParseError(e) => write!(f, "error parsing filter; {}", e),
|
||||
Fst(e) => write!(f, "fst error; {}", e),
|
||||
Heed(e) => write!(f, "heed error; {}", e),
|
||||
IndexAlreadyExists => write!(f, "index already exists"),
|
||||
Io(e) => write!(f, "{}", e),
|
||||
MaxFieldsLimitExceeded => write!(f, "maximum number of fields in a document exceeded"),
|
||||
MissingDocumentId => write!(f, "document id is missing"),
|
||||
MissingPrimaryKey => write!(f, "schema cannot be built without a primary key"),
|
||||
Schema(e) => write!(f, "schema error; {}", e),
|
||||
SchemaMissing => write!(f, "this index does not have a schema"),
|
||||
SerdeJson(e) => write!(f, "serde json error; {}", e),
|
||||
Serializer(e) => write!(f, "serializer error; {}", e),
|
||||
VersionMismatch(version) => write!(f, "Cannot open database, expected MeiliSearch engine version: {}, current engine version: {}.{}.{}",
|
||||
version,
|
||||
env!("CARGO_PKG_VERSION_MAJOR"),
|
||||
env!("CARGO_PKG_VERSION_MINOR"),
|
||||
env!("CARGO_PKG_VERSION_PATCH")),
|
||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for Error {}
|
||||
|
||||
struct FilterParseError(PestError<Rule>);
|
||||
|
||||
impl fmt::Display for FilterParseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use crate::pest_error::LineColLocation::*;
|
||||
|
||||
let (line, column) = match self.0.line_col {
|
||||
Span((line, _), (column, _)) => (line, column),
|
||||
Pos((line, column)) => (line, column),
|
||||
};
|
||||
write!(f, "parsing error on line {} at column {}: {}", line, column, self.0.variant.message())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FacetError {
|
||||
EmptyArray,
|
||||
ParsingError(String),
|
||||
UnexpectedToken { expected: &'static [&'static str], found: String },
|
||||
InvalidFormat(String),
|
||||
AttributeNotFound(String),
|
||||
AttributeNotSet { expected: Vec<String>, found: String },
|
||||
InvalidDocumentAttribute(String),
|
||||
NoAttributesForFaceting,
|
||||
}
|
||||
|
||||
impl FacetError {
|
||||
pub fn unexpected_token(expected: &'static [&'static str], found: impl ToString) -> FacetError {
|
||||
FacetError::UnexpectedToken{ expected, found: found.to_string() }
|
||||
}
|
||||
|
||||
pub fn attribute_not_set(expected: Vec<String>, found: impl ToString) -> FacetError {
|
||||
FacetError::AttributeNotSet{ expected, found: found.to_string() }
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FacetError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use FacetError::*;
|
||||
|
||||
match self {
|
||||
EmptyArray => write!(f, "empty array in facet filter is unspecified behavior"),
|
||||
ParsingError(msg) => write!(f, "parsing error: {}", msg),
|
||||
UnexpectedToken { expected, found } => write!(f, "unexpected token {}, expected {}", found, expected.join("or")),
|
||||
InvalidFormat(found) => write!(f, "invalid facet: {}, facets should be \"facetName:facetValue\"", found),
|
||||
AttributeNotFound(attr) => write!(f, "unknown {:?} attribute", attr),
|
||||
AttributeNotSet { found, expected } => write!(f, "`{}` is not set as a faceted attribute. available facet attributes: {}", found, expected.join(", ")),
|
||||
InvalidDocumentAttribute(attr) => write!(f, "invalid document attribute {}, accepted types: String and [String]", attr),
|
||||
NoAttributesForFaceting => write!(f, "impossible to perform faceted search, no attributes for faceting are set"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,357 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::hash::Hash;
|
||||
use std::ops::Deref;
|
||||
|
||||
use cow_utils::CowUtils;
|
||||
use either::Either;
|
||||
use heed::types::{Str, OwnedType};
|
||||
use indexmap::IndexMap;
|
||||
use serde_json::Value;
|
||||
|
||||
use meilisearch_schema::{FieldId, Schema};
|
||||
use meilisearch_types::DocumentId;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::error::{FacetError, MResult};
|
||||
use crate::store::BEU16;
|
||||
|
||||
/// Data structure used to represent a boolean expression in the form of nested arrays.
|
||||
/// Values in the outer array are and-ed together, values in the inner arrays are or-ed together.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct FacetFilter(Vec<Either<Vec<FacetKey>, FacetKey>>);
|
||||
|
||||
impl Deref for FacetFilter {
|
||||
type Target = Vec<Either<Vec<FacetKey>, FacetKey>>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl FacetFilter {
|
||||
pub fn from_str(
|
||||
s: &str,
|
||||
schema: &Schema,
|
||||
attributes_for_faceting: &[FieldId],
|
||||
) -> MResult<FacetFilter> {
|
||||
if attributes_for_faceting.is_empty() {
|
||||
return Err(FacetError::NoAttributesForFaceting.into());
|
||||
}
|
||||
let parsed = serde_json::from_str::<Value>(s).map_err(|e| FacetError::ParsingError(e.to_string()))?;
|
||||
let mut filter = Vec::new();
|
||||
match parsed {
|
||||
Value::Array(and_exprs) => {
|
||||
if and_exprs.is_empty() {
|
||||
return Err(FacetError::EmptyArray.into());
|
||||
}
|
||||
for expr in and_exprs {
|
||||
match expr {
|
||||
Value::String(s) => {
|
||||
let key = FacetKey::from_str( &s, schema, attributes_for_faceting)?;
|
||||
filter.push(Either::Right(key));
|
||||
}
|
||||
Value::Array(or_exprs) => {
|
||||
if or_exprs.is_empty() {
|
||||
return Err(FacetError::EmptyArray.into());
|
||||
}
|
||||
let mut inner = Vec::new();
|
||||
for expr in or_exprs {
|
||||
match expr {
|
||||
Value::String(s) => {
|
||||
let key = FacetKey::from_str( &s, schema, attributes_for_faceting)?;
|
||||
inner.push(key);
|
||||
}
|
||||
bad_value => return Err(FacetError::unexpected_token(&["String"], bad_value).into()),
|
||||
}
|
||||
}
|
||||
filter.push(Either::Left(inner));
|
||||
}
|
||||
bad_value => return Err(FacetError::unexpected_token(&["Array", "String"], bad_value).into()),
|
||||
}
|
||||
}
|
||||
Ok(Self(filter))
|
||||
}
|
||||
bad_value => Err(FacetError::unexpected_token(&["Array"], bad_value).into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Hash)]
|
||||
#[repr(C)]
|
||||
pub struct FacetKey(FieldId, String);
|
||||
|
||||
impl FacetKey {
|
||||
pub fn new(field_id: FieldId, value: String) -> Self {
|
||||
let value = match value.cow_to_lowercase() {
|
||||
Cow::Borrowed(_) => value,
|
||||
Cow::Owned(s) => s,
|
||||
};
|
||||
Self(field_id, value)
|
||||
}
|
||||
|
||||
pub fn key(&self) -> FieldId {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn value(&self) -> &str {
|
||||
&self.1
|
||||
}
|
||||
|
||||
// TODO improve parser
|
||||
fn from_str(
|
||||
s: &str,
|
||||
schema: &Schema,
|
||||
attributes_for_faceting: &[FieldId],
|
||||
) -> Result<Self, FacetError> {
|
||||
let mut split = s.splitn(2, ':');
|
||||
let key = split
|
||||
.next()
|
||||
.ok_or_else(|| FacetError::InvalidFormat(s.to_string()))?
|
||||
.trim();
|
||||
let field_id = schema
|
||||
.id(key)
|
||||
.ok_or_else(|| FacetError::AttributeNotFound(key.to_string()))?;
|
||||
|
||||
if !attributes_for_faceting.contains(&field_id) {
|
||||
return Err(FacetError::attribute_not_set(
|
||||
attributes_for_faceting
|
||||
.iter()
|
||||
.filter_map(|&id| schema.name(id))
|
||||
.map(str::to_string)
|
||||
.collect::<Vec<_>>(),
|
||||
key))
|
||||
}
|
||||
let value = split
|
||||
.next()
|
||||
.ok_or_else(|| FacetError::InvalidFormat(s.to_string()))?
|
||||
.trim();
|
||||
// unquoting the string if need be:
|
||||
let mut indices = value.char_indices();
|
||||
let value = match (indices.next(), indices.last()) {
|
||||
(Some((s, '\'')), Some((e, '\''))) |
|
||||
(Some((s, '\"')), Some((e, '\"'))) => value[s + 1..e].to_string(),
|
||||
_ => value.to_string(),
|
||||
};
|
||||
Ok(Self::new(field_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FacetKey {
|
||||
type EItem = FacetKey;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut buffer = Vec::with_capacity(2 + item.1.len());
|
||||
let id = BEU16::new(item.key().into());
|
||||
let id_bytes = OwnedType::bytes_encode(&id)?;
|
||||
let value_bytes = Str::bytes_encode(item.value())?;
|
||||
buffer.extend_from_slice(id_bytes.as_ref());
|
||||
buffer.extend_from_slice(value_bytes.as_ref());
|
||||
Some(Cow::Owned(buffer))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetKey {
|
||||
type DItem = FacetKey;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (id_bytes, value_bytes) = bytes.split_at(2);
|
||||
let id = OwnedType::<BEU16>::bytes_decode(id_bytes)?;
|
||||
let id = id.get().into();
|
||||
let string = Str::bytes_decode(&value_bytes)?;
|
||||
Some(FacetKey(id, string.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_to_facet_map(
|
||||
facet_map: &mut HashMap<FacetKey, (String, Vec<DocumentId>)>,
|
||||
field_id: FieldId,
|
||||
value: Value,
|
||||
document_id: DocumentId,
|
||||
) -> Result<(), FacetError> {
|
||||
let value = match value {
|
||||
Value::String(s) => s,
|
||||
// ignore null
|
||||
Value::Null => return Ok(()),
|
||||
value => return Err(FacetError::InvalidDocumentAttribute(value.to_string())),
|
||||
};
|
||||
let key = FacetKey::new(field_id, value.clone());
|
||||
facet_map.entry(key).or_insert_with(|| (value, Vec::new())).1.push(document_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn facet_map_from_docids(
|
||||
rtxn: &heed::RoTxn<MainT>,
|
||||
index: &crate::Index,
|
||||
document_ids: &[DocumentId],
|
||||
attributes_for_facetting: &[FieldId],
|
||||
) -> MResult<HashMap<FacetKey, (String, Vec<DocumentId>)>> {
|
||||
// A hashmap that ascociate a facet key to a pair containing the original facet attribute
|
||||
// string with it's case preserved, and a list of document ids for that facet attribute.
|
||||
let mut facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)> = HashMap::new();
|
||||
for document_id in document_ids {
|
||||
for result in index
|
||||
.documents_fields
|
||||
.document_fields(rtxn, *document_id)?
|
||||
{
|
||||
let (field_id, bytes) = result?;
|
||||
if attributes_for_facetting.contains(&field_id) {
|
||||
match serde_json::from_slice(bytes)? {
|
||||
Value::Array(values) => {
|
||||
for v in values {
|
||||
add_to_facet_map(&mut facet_map, field_id, v, *document_id)?;
|
||||
}
|
||||
}
|
||||
v => add_to_facet_map(&mut facet_map, field_id, v, *document_id)?,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(facet_map)
|
||||
}
|
||||
|
||||
pub fn facet_map_from_docs(
|
||||
schema: &Schema,
|
||||
documents: &HashMap<DocumentId, IndexMap<String, Value>>,
|
||||
attributes_for_facetting: &[FieldId],
|
||||
) -> MResult<HashMap<FacetKey, (String, Vec<DocumentId>)>> {
|
||||
let mut facet_map = HashMap::new();
|
||||
let attributes_for_facetting = attributes_for_facetting
|
||||
.iter()
|
||||
.filter_map(|&id| schema.name(id).map(|name| (id, name)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for (id, document) in documents {
|
||||
for (field_id, name) in &attributes_for_facetting {
|
||||
if let Some(value) = document.get(*name) {
|
||||
match value {
|
||||
Value::Array(values) => {
|
||||
for v in values {
|
||||
add_to_facet_map(&mut facet_map, *field_id, v.clone(), *id)?;
|
||||
}
|
||||
}
|
||||
v => add_to_facet_map(&mut facet_map, *field_id, v.clone(), *id)?,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(facet_map)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use meilisearch_schema::Schema;
|
||||
|
||||
#[test]
|
||||
fn test_facet_key() {
|
||||
let mut schema = Schema::new();
|
||||
let id = schema.insert_and_index("hello").unwrap();
|
||||
let facet_list = [schema.id("hello").unwrap()];
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:12", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "12".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:\"foo bar\"", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "foo bar".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:'foo bar'", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "foo bar".to_string())
|
||||
);
|
||||
// weird case
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:blabla:machin", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "blabla:machin".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:\"\"", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:'", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "'".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:''", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "".to_string())
|
||||
);
|
||||
assert!(FacetKey::from_str("hello", &schema, &facet_list).is_err());
|
||||
assert!(FacetKey::from_str("toto:12", &schema, &facet_list).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_facet_array() {
|
||||
use either::Either::{Left, Right};
|
||||
let mut schema = Schema::new();
|
||||
let _id = schema.insert_and_index("hello").unwrap();
|
||||
let facet_list = [schema.id("hello").unwrap()];
|
||||
assert_eq!(
|
||||
FacetFilter::from_str("[[\"hello:12\"]]", &schema, &facet_list).unwrap(),
|
||||
FacetFilter(vec![Left(vec![FacetKey(FieldId(0), "12".to_string())])])
|
||||
);
|
||||
assert_eq!(
|
||||
FacetFilter::from_str("[\"hello:12\"]", &schema, &facet_list).unwrap(),
|
||||
FacetFilter(vec![Right(FacetKey(FieldId(0), "12".to_string()))])
|
||||
);
|
||||
assert_eq!(
|
||||
FacetFilter::from_str("[\"hello:12\", \"hello:13\"]", &schema, &facet_list).unwrap(),
|
||||
FacetFilter(vec![
|
||||
Right(FacetKey(FieldId(0), "12".to_string())),
|
||||
Right(FacetKey(FieldId(0), "13".to_string()))
|
||||
])
|
||||
);
|
||||
assert_eq!(
|
||||
FacetFilter::from_str("[[\"hello:12\", \"hello:13\"]]", &schema, &facet_list).unwrap(),
|
||||
FacetFilter(vec![Left(vec![
|
||||
FacetKey(FieldId(0), "12".to_string()),
|
||||
FacetKey(FieldId(0), "13".to_string())
|
||||
])])
|
||||
);
|
||||
assert_eq!(
|
||||
FacetFilter::from_str(
|
||||
"[[\"hello:12\", \"hello:13\"], \"hello:14\"]",
|
||||
&schema,
|
||||
&facet_list
|
||||
)
|
||||
.unwrap(),
|
||||
FacetFilter(vec![
|
||||
Left(vec![
|
||||
FacetKey(FieldId(0), "12".to_string()),
|
||||
FacetKey(FieldId(0), "13".to_string())
|
||||
]),
|
||||
Right(FacetKey(FieldId(0), "14".to_string()))
|
||||
])
|
||||
);
|
||||
|
||||
// invalid array depths
|
||||
assert!(FacetFilter::from_str(
|
||||
"[[[\"hello:12\", \"hello:13\"], \"hello:14\"]]",
|
||||
&schema,
|
||||
&facet_list
|
||||
)
|
||||
.is_err());
|
||||
assert!(FacetFilter::from_str(
|
||||
"[[[\"hello:12\", \"hello:13\"]], \"hello:14\"]]",
|
||||
&schema,
|
||||
&facet_list
|
||||
)
|
||||
.is_err());
|
||||
assert!(FacetFilter::from_str("\"hello:14\"", &schema, &facet_list).is_err());
|
||||
|
||||
// unexisting key
|
||||
assert!(FacetFilter::from_str("[\"foo:12\"]", &schema, &facet_list).is_err());
|
||||
|
||||
// invalid facet key
|
||||
assert!(FacetFilter::from_str("[\"foo=12\"]", &schema, &facet_list).is_err());
|
||||
assert!(FacetFilter::from_str("[\"foo12\"]", &schema, &facet_list).is_err());
|
||||
assert!(FacetFilter::from_str("[\"\"]", &schema, &facet_list).is_err());
|
||||
|
||||
// empty array error
|
||||
assert!(FacetFilter::from_str("[]", &schema, &facet_list).is_err());
|
||||
assert!(FacetFilter::from_str("[\"hello:12\", []]", &schema, &facet_list).is_err());
|
||||
}
|
||||
}
|
||||
@@ -1,276 +0,0 @@
|
||||
use std::str::FromStr;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::{store::Index, DocumentId, MainT};
|
||||
use heed::RoTxn;
|
||||
use meilisearch_schema::{FieldId, Schema};
|
||||
use pest::error::{Error as PestError, ErrorVariant};
|
||||
use pest::iterators::Pair;
|
||||
use serde_json::{Value, Number};
|
||||
use super::parser::Rule;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum ConditionType {
|
||||
Greater,
|
||||
Less,
|
||||
Equal,
|
||||
LessEqual,
|
||||
GreaterEqual,
|
||||
NotEqual,
|
||||
}
|
||||
|
||||
/// We need to infer type when the filter is constructed
|
||||
/// and match every possible types it can be parsed into.
|
||||
#[derive(Debug)]
|
||||
struct ConditionValue<'a> {
|
||||
string: &'a str,
|
||||
boolean: Option<bool>,
|
||||
number: Option<Number>
|
||||
}
|
||||
|
||||
impl<'a> ConditionValue<'a> {
|
||||
pub fn new(value: &Pair<'a, Rule>) -> Self {
|
||||
match value.as_rule() {
|
||||
Rule::string | Rule::word => {
|
||||
let string = value.as_str();
|
||||
let boolean = match value.as_str() {
|
||||
"true" => Some(true),
|
||||
"false" => Some(false),
|
||||
_ => None,
|
||||
};
|
||||
let number = Number::from_str(value.as_str()).ok();
|
||||
ConditionValue { string, boolean, number }
|
||||
},
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
self.string
|
||||
}
|
||||
|
||||
pub fn as_number(&self) -> Option<&Number> {
|
||||
self.number.as_ref()
|
||||
}
|
||||
|
||||
pub fn as_bool(&self) -> Option<bool> {
|
||||
self.boolean
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Condition<'a> {
|
||||
field: FieldId,
|
||||
condition: ConditionType,
|
||||
value: ConditionValue<'a>
|
||||
}
|
||||
|
||||
fn get_field_value<'a>(schema: &Schema, pair: Pair<'a, Rule>) -> Result<(FieldId, ConditionValue<'a>), Error> {
|
||||
let mut items = pair.into_inner();
|
||||
// lexing ensures that we at least have a key
|
||||
let key = items.next().unwrap();
|
||||
let field = schema
|
||||
.id(key.as_str())
|
||||
.ok_or_else(|| PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` not found, available attributes are: {}",
|
||||
key.as_str(),
|
||||
schema.names().collect::<Vec<_>>().join(", ")
|
||||
),
|
||||
},
|
||||
key.as_span()))?;
|
||||
let value = ConditionValue::new(&items.next().unwrap());
|
||||
Ok((field, value))
|
||||
}
|
||||
|
||||
// undefined behavior with big numbers
|
||||
fn compare_numbers(lhs: &Number, rhs: &Number) -> Option<Ordering> {
|
||||
match (lhs.as_i64(), lhs.as_u64(), lhs.as_f64(),
|
||||
rhs.as_i64(), rhs.as_u64(), rhs.as_f64()) {
|
||||
// i64 u64 f64 i64 u64 f64
|
||||
(Some(lhs), _, _, Some(rhs), _, _) => lhs.partial_cmp(&rhs),
|
||||
(_, Some(lhs), _, _, Some(rhs), _) => lhs.partial_cmp(&rhs),
|
||||
(_, _, Some(lhs), _, _, Some(rhs)) => lhs.partial_cmp(&rhs),
|
||||
(_, _, _, _, _, _) => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Condition<'a> {
|
||||
pub fn less(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::Less;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn greater(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::Greater;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn neq(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::NotEqual;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn geq(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::GreaterEqual;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn leq(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::LessEqual;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn eq(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::Equal;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn test(
|
||||
&self,
|
||||
reader: &RoTxn<MainT>,
|
||||
index: &Index,
|
||||
document_id: DocumentId,
|
||||
) -> Result<bool, Error> {
|
||||
match index.document_attribute::<Value>(reader, document_id, self.field)? {
|
||||
Some(Value::Array(values)) => Ok(values.iter().any(|v| self.match_value(Some(v)))),
|
||||
other => Ok(self.match_value(other.as_ref())),
|
||||
}
|
||||
}
|
||||
|
||||
fn match_value(&self, value: Option<&Value>) -> bool {
|
||||
match value {
|
||||
Some(Value::String(s)) => {
|
||||
let value = self.value.as_str();
|
||||
match self.condition {
|
||||
ConditionType::Equal => unicase::eq(value, &s),
|
||||
ConditionType::NotEqual => !unicase::eq(value, &s),
|
||||
_ => false
|
||||
}
|
||||
},
|
||||
Some(Value::Number(n)) => {
|
||||
if let Some(value) = self.value.as_number() {
|
||||
if let Some(ord) = compare_numbers(&n, value) {
|
||||
let res = match self.condition {
|
||||
ConditionType::Equal => ord == Ordering::Equal,
|
||||
ConditionType::NotEqual => ord != Ordering::Equal,
|
||||
ConditionType::GreaterEqual => ord != Ordering::Less,
|
||||
ConditionType::LessEqual => ord != Ordering::Greater,
|
||||
ConditionType::Greater => ord == Ordering::Greater,
|
||||
ConditionType::Less => ord == Ordering::Less,
|
||||
};
|
||||
return res
|
||||
}
|
||||
}
|
||||
false
|
||||
},
|
||||
Some(Value::Bool(b)) => {
|
||||
if let Some(value) = self.value.as_bool() {
|
||||
let res = match self.condition {
|
||||
ConditionType::Equal => *b == value,
|
||||
ConditionType::NotEqual => *b != value,
|
||||
_ => false
|
||||
};
|
||||
return res
|
||||
}
|
||||
false
|
||||
},
|
||||
// if field is not supported (or not found), all values are different from it,
|
||||
// so != should always return true in this case.
|
||||
_ => self.condition == ConditionType::NotEqual,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use serde_json::Number;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
#[test]
|
||||
fn test_number_comp() {
|
||||
// test both u64
|
||||
let n1 = Number::from(1u64);
|
||||
let n2 = Number::from(2u64);
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
|
||||
let n1 = Number::from(1u64);
|
||||
let n2 = Number::from(1u64);
|
||||
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
|
||||
|
||||
// test both i64
|
||||
let n1 = Number::from(1i64);
|
||||
let n2 = Number::from(2i64);
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
|
||||
let n1 = Number::from(1i64);
|
||||
let n2 = Number::from(1i64);
|
||||
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
|
||||
|
||||
// test both f64
|
||||
let n1 = Number::from_f64(1f64).unwrap();
|
||||
let n2 = Number::from_f64(2f64).unwrap();
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
|
||||
let n1 = Number::from_f64(1f64).unwrap();
|
||||
let n2 = Number::from_f64(1f64).unwrap();
|
||||
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
|
||||
|
||||
// test one u64 and one f64
|
||||
let n1 = Number::from_f64(1f64).unwrap();
|
||||
let n2 = Number::from(2u64);
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
|
||||
|
||||
// equality
|
||||
let n1 = Number::from_f64(1f64).unwrap();
|
||||
let n2 = Number::from(1u64);
|
||||
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Equal), compare_numbers(&n2, &n1));
|
||||
|
||||
// float is neg
|
||||
let n1 = Number::from_f64(-1f64).unwrap();
|
||||
let n2 = Number::from(1u64);
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
|
||||
|
||||
// float is too big
|
||||
let n1 = Number::from_f64(std::f64::MAX).unwrap();
|
||||
let n2 = Number::from(1u64);
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n2, &n1));
|
||||
|
||||
// misc
|
||||
let n1 = Number::from_f64(std::f64::MAX).unwrap();
|
||||
let n2 = Number::from(std::u64::MAX);
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some( Ordering::Less ), compare_numbers(&n2, &n1));
|
||||
}
|
||||
}
|
||||
@@ -1,127 +0,0 @@
|
||||
mod parser;
|
||||
mod condition;
|
||||
|
||||
pub(crate) use parser::Rule;
|
||||
|
||||
use std::ops::Not;
|
||||
|
||||
use condition::Condition;
|
||||
use crate::error::Error;
|
||||
use crate::{DocumentId, MainT, store::Index};
|
||||
use heed::RoTxn;
|
||||
use meilisearch_schema::Schema;
|
||||
use parser::{PREC_CLIMBER, FilterParser};
|
||||
use pest::iterators::{Pair, Pairs};
|
||||
use pest::Parser;
|
||||
|
||||
type FilterResult<'a> = Result<Filter<'a>, Error>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Filter<'a> {
|
||||
Condition(Condition<'a>),
|
||||
Or(Box<Self>, Box<Self>),
|
||||
And(Box<Self>, Box<Self>),
|
||||
Not(Box<Self>),
|
||||
}
|
||||
|
||||
impl<'a> Filter<'a> {
|
||||
pub fn parse(expr: &'a str, schema: &'a Schema) -> FilterResult<'a> {
|
||||
let mut lexed = FilterParser::parse(Rule::prgm, expr)?;
|
||||
Self::build(lexed.next().unwrap().into_inner(), schema)
|
||||
}
|
||||
|
||||
pub fn test(
|
||||
&self,
|
||||
reader: &RoTxn<MainT>,
|
||||
index: &Index,
|
||||
document_id: DocumentId,
|
||||
) -> Result<bool, Error> {
|
||||
use Filter::*;
|
||||
match self {
|
||||
Condition(c) => c.test(reader, index, document_id),
|
||||
Or(lhs, rhs) => Ok(
|
||||
lhs.test(reader, index, document_id)? || rhs.test(reader, index, document_id)?
|
||||
),
|
||||
And(lhs, rhs) => Ok(
|
||||
lhs.test(reader, index, document_id)? && rhs.test(reader, index, document_id)?
|
||||
),
|
||||
Not(op) => op.test(reader, index, document_id).map(bool::not),
|
||||
}
|
||||
}
|
||||
|
||||
fn build(expression: Pairs<'a, Rule>, schema: &'a Schema) -> FilterResult<'a> {
|
||||
PREC_CLIMBER.climb(
|
||||
expression,
|
||||
|pair: Pair<Rule>| match pair.as_rule() {
|
||||
Rule::eq => Ok(Filter::Condition(Condition::eq(pair, schema)?)),
|
||||
Rule::greater => Ok(Filter::Condition(Condition::greater(pair, schema)?)),
|
||||
Rule::less => Ok(Filter::Condition(Condition::less(pair, schema)?)),
|
||||
Rule::neq => Ok(Filter::Condition(Condition::neq(pair, schema)?)),
|
||||
Rule::geq => Ok(Filter::Condition(Condition::geq(pair, schema)?)),
|
||||
Rule::leq => Ok(Filter::Condition(Condition::leq(pair, schema)?)),
|
||||
Rule::prgm => Self::build(pair.into_inner(), schema),
|
||||
Rule::term => Self::build(pair.into_inner(), schema),
|
||||
Rule::not => Ok(Filter::Not(Box::new(Self::build(
|
||||
pair.into_inner(),
|
||||
schema,
|
||||
)?))),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
|lhs: FilterResult, op: Pair<Rule>, rhs: FilterResult| match op.as_rule() {
|
||||
Rule::or => Ok(Filter::Or(Box::new(lhs?), Box::new(rhs?))),
|
||||
Rule::and => Ok(Filter::And(Box::new(lhs?), Box::new(rhs?))),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn invalid_syntax() {
|
||||
assert!(FilterParser::parse(Rule::prgm, "field : id").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "field=hello hello").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "field=hello OR OR").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "OR field:hello").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field="hello world"#).is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field='hello world"#).is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "NOT field=").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "N").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "(field=1").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "(field=1))").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "field=1ORfield=2").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "field=1 ( OR field=2)").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "hello world=1").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"((((((hello=world)))))"#).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_syntax() {
|
||||
assert!(FilterParser::parse(Rule::prgm, "field = id").is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, "field=id").is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field >= 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field <= 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field="hello world""#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field='hello world'"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field > 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field < 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field < 10 AND NOT field=5"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field < 10 AND NOT field > 7.5"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field=true OR NOT field=5"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"NOT field=true OR NOT field=5"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field='hello world' OR ( NOT field=true OR NOT field=5 )"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field='hello \'worl\'d' OR ( NOT field=true OR NOT field=5 )"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field="hello \"worl\"d" OR ( NOT field=true OR NOT field=5 )"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"((((((hello=world))))))"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#""foo bar" > 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#""foo bar" = 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"'foo bar' = 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"'foo bar' <= 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"'foo bar' != 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"bar != 10"#).is_ok());
|
||||
}
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
key = _{quoted | word}
|
||||
value = _{quoted | word}
|
||||
quoted = _{ (PUSH("'") | PUSH("\"")) ~ string ~ POP }
|
||||
string = {char*}
|
||||
word = ${(LETTER | NUMBER | "_" | "-" | ".")+}
|
||||
|
||||
char = _{ !(PEEK | "\\") ~ ANY
|
||||
| "\\" ~ (PEEK | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
|
||||
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})}
|
||||
|
||||
condition = _{eq | greater | less | geq | leq | neq}
|
||||
geq = {key ~ ">=" ~ value}
|
||||
leq = {key ~ "<=" ~ value}
|
||||
neq = {key ~ "!=" ~ value}
|
||||
eq = {key ~ "=" ~ value}
|
||||
greater = {key ~ ">" ~ value}
|
||||
less = {key ~ "<" ~ value}
|
||||
|
||||
prgm = {SOI ~ expr ~ EOI}
|
||||
expr = _{ ( term ~ (operation ~ term)* ) }
|
||||
term = { ("(" ~ expr ~ ")") | condition | not }
|
||||
operation = _{ and | or }
|
||||
and = {"AND"}
|
||||
or = {"OR"}
|
||||
|
||||
not = {"NOT" ~ term}
|
||||
|
||||
WHITESPACE = _{ " " }
|
||||
@@ -1,12 +0,0 @@
|
||||
use once_cell::sync::Lazy;
|
||||
use pest::prec_climber::{Operator, Assoc, PrecClimber};
|
||||
|
||||
pub static PREC_CLIMBER: Lazy<PrecClimber<Rule>> = Lazy::new(|| {
|
||||
use Assoc::*;
|
||||
use Rule::*;
|
||||
pest::prec_climber::PrecClimber::new(vec![Operator::new(or, Left), Operator::new(and, Left)])
|
||||
});
|
||||
|
||||
#[derive(Parser)]
|
||||
#[grammar = "filters/parser/grammar.pest"]
|
||||
pub struct FilterParser;
|
||||
@@ -1,134 +0,0 @@
|
||||
use std::cmp::min;
|
||||
use std::collections::BTreeMap;
|
||||
use std::ops::{Index, IndexMut};
|
||||
|
||||
// A simple wrapper around vec so we can get contiguous but index it like it's 2D array.
|
||||
struct N2Array<T> {
|
||||
y_size: usize,
|
||||
buf: Vec<T>,
|
||||
}
|
||||
|
||||
impl<T: Clone> N2Array<T> {
|
||||
fn new(x: usize, y: usize, value: T) -> N2Array<T> {
|
||||
N2Array {
|
||||
y_size: y,
|
||||
buf: vec![value; x * y],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Index<(usize, usize)> for N2Array<T> {
|
||||
type Output = T;
|
||||
|
||||
#[inline]
|
||||
fn index(&self, (x, y): (usize, usize)) -> &T {
|
||||
&self.buf[(x * self.y_size) + y]
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> IndexMut<(usize, usize)> for N2Array<T> {
|
||||
#[inline]
|
||||
fn index_mut(&mut self, (x, y): (usize, usize)) -> &mut T {
|
||||
&mut self.buf[(x * self.y_size) + y]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn prefix_damerau_levenshtein(source: &[u8], target: &[u8]) -> (u32, usize) {
|
||||
let (n, m) = (source.len(), target.len());
|
||||
|
||||
assert!(
|
||||
n <= m,
|
||||
"the source string must be shorter than the target one"
|
||||
);
|
||||
|
||||
if n == 0 {
|
||||
return (m as u32, 0);
|
||||
}
|
||||
if m == 0 {
|
||||
return (n as u32, 0);
|
||||
}
|
||||
|
||||
if n == m && source == target {
|
||||
return (0, m);
|
||||
}
|
||||
|
||||
let inf = n + m;
|
||||
let mut matrix = N2Array::new(n + 2, m + 2, 0);
|
||||
|
||||
matrix[(0, 0)] = inf;
|
||||
for i in 0..n + 1 {
|
||||
matrix[(i + 1, 0)] = inf;
|
||||
matrix[(i + 1, 1)] = i;
|
||||
}
|
||||
for j in 0..m + 1 {
|
||||
matrix[(0, j + 1)] = inf;
|
||||
matrix[(1, j + 1)] = j;
|
||||
}
|
||||
|
||||
let mut last_row = BTreeMap::new();
|
||||
|
||||
for (row, char_s) in source.iter().enumerate() {
|
||||
let mut last_match_col = 0;
|
||||
let row = row + 1;
|
||||
|
||||
for (col, char_t) in target.iter().enumerate() {
|
||||
let col = col + 1;
|
||||
let last_match_row = *last_row.get(&char_t).unwrap_or(&0);
|
||||
let cost = if char_s == char_t { 0 } else { 1 };
|
||||
|
||||
let dist_add = matrix[(row, col + 1)] + 1;
|
||||
let dist_del = matrix[(row + 1, col)] + 1;
|
||||
let dist_sub = matrix[(row, col)] + cost;
|
||||
let dist_trans = matrix[(last_match_row, last_match_col)]
|
||||
+ (row - last_match_row - 1)
|
||||
+ 1
|
||||
+ (col - last_match_col - 1);
|
||||
|
||||
let dist = min(min(dist_add, dist_del), min(dist_sub, dist_trans));
|
||||
|
||||
matrix[(row + 1, col + 1)] = dist;
|
||||
|
||||
if cost == 0 {
|
||||
last_match_col = col;
|
||||
}
|
||||
}
|
||||
|
||||
last_row.insert(char_s, row);
|
||||
}
|
||||
|
||||
let mut minimum = (u32::max_value(), 0);
|
||||
|
||||
for x in n..=m {
|
||||
let dist = matrix[(n + 1, x + 1)] as u32;
|
||||
if dist < minimum.0 {
|
||||
minimum = (dist, x)
|
||||
}
|
||||
}
|
||||
|
||||
minimum
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn matched_length() {
|
||||
let query = "Levenste";
|
||||
let text = "Levenshtein";
|
||||
|
||||
let (dist, length) = prefix_damerau_levenshtein(query.as_bytes(), text.as_bytes());
|
||||
assert_eq!(dist, 1);
|
||||
assert_eq!(&text[..length], "Levenshte");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn matched_length_panic() {
|
||||
let query = "Levenshtein";
|
||||
let text = "Levenste";
|
||||
|
||||
// this function will panic if source if longer than target
|
||||
prefix_damerau_levenshtein(query.as_bytes(), text.as_bytes());
|
||||
}
|
||||
}
|
||||
@@ -1,202 +0,0 @@
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate assert_matches;
|
||||
#[macro_use]
|
||||
extern crate pest_derive;
|
||||
|
||||
mod automaton;
|
||||
mod bucket_sort;
|
||||
mod database;
|
||||
mod distinct_map;
|
||||
mod error;
|
||||
mod filters;
|
||||
mod levenshtein;
|
||||
mod number;
|
||||
mod query_builder;
|
||||
mod query_tree;
|
||||
mod query_words_mapper;
|
||||
mod ranked_map;
|
||||
mod raw_document;
|
||||
mod reordered_attrs;
|
||||
pub mod criterion;
|
||||
pub mod facets;
|
||||
pub mod raw_indexer;
|
||||
pub mod serde;
|
||||
pub mod settings;
|
||||
pub mod store;
|
||||
pub mod update;
|
||||
|
||||
pub use self::database::{BoxUpdateFn, Database, DatabaseOptions, MainT, UpdateT, MainWriter, MainReader, UpdateWriter, UpdateReader};
|
||||
pub use self::error::{Error, HeedError, FstError, MResult, pest_error, FacetError};
|
||||
pub use self::filters::Filter;
|
||||
pub use self::number::{Number, ParseNumberError};
|
||||
pub use self::ranked_map::RankedMap;
|
||||
pub use self::raw_document::RawDocument;
|
||||
pub use self::store::Index;
|
||||
pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
|
||||
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
|
||||
pub use meilisearch_schema::Schema;
|
||||
pub use query_words_mapper::QueryWordsMapper;
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
use log::{error, trace};
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use crate::bucket_sort::PostingsListView;
|
||||
use crate::levenshtein::prefix_damerau_levenshtein;
|
||||
use crate::query_tree::{QueryId, QueryKind};
|
||||
use crate::reordered_attrs::ReorderedAttrs;
|
||||
|
||||
type FstSetCow<'a> = fst::Set<Cow<'a, [u8]>>;
|
||||
type FstMapCow<'a> = fst::Map<Cow<'a, [u8]>>;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Document {
|
||||
pub id: DocumentId,
|
||||
pub highlights: Vec<Highlight>,
|
||||
|
||||
#[cfg(test)]
|
||||
pub matches: Vec<crate::bucket_sort::SimpleMatch>,
|
||||
}
|
||||
|
||||
fn highlights_from_raw_document<'a, 'tag, 'txn>(
|
||||
raw_document: &RawDocument<'a, 'tag>,
|
||||
queries_kinds: &HashMap<QueryId, &QueryKind>,
|
||||
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
searchable_attrs: Option<&ReorderedAttrs>,
|
||||
schema: &Schema,
|
||||
) -> Vec<Highlight>
|
||||
{
|
||||
let mut highlights = Vec::new();
|
||||
|
||||
for bm in raw_document.bare_matches.iter() {
|
||||
let postings_list = &arena[bm.postings_list];
|
||||
let input = postings_list.input();
|
||||
let kind = &queries_kinds.get(&bm.query_index);
|
||||
|
||||
for di in postings_list.iter() {
|
||||
let covered_area = match kind {
|
||||
Some(QueryKind::NonTolerant(query)) | Some(QueryKind::Tolerant(query)) => {
|
||||
let len = if query.len() > input.len() {
|
||||
input.len()
|
||||
} else {
|
||||
prefix_damerau_levenshtein(query.as_bytes(), input).1
|
||||
};
|
||||
u16::try_from(len).unwrap_or(u16::max_value())
|
||||
},
|
||||
_ => di.char_length,
|
||||
};
|
||||
|
||||
let attribute = searchable_attrs
|
||||
.and_then(|sa| sa.reverse(di.attribute))
|
||||
.unwrap_or(di.attribute);
|
||||
|
||||
let attribute = match schema.indexed_pos_to_field_id(attribute) {
|
||||
Some(field_id) => field_id.0,
|
||||
None => {
|
||||
error!("Cannot convert indexed_pos {} to field_id", attribute);
|
||||
trace!("Schema is compromized; {:?}", schema);
|
||||
continue
|
||||
}
|
||||
};
|
||||
|
||||
let highlight = Highlight {
|
||||
attribute,
|
||||
char_index: di.char_index,
|
||||
char_length: covered_area,
|
||||
};
|
||||
|
||||
highlights.push(highlight);
|
||||
}
|
||||
}
|
||||
|
||||
highlights
|
||||
}
|
||||
|
||||
impl Document {
|
||||
#[cfg(not(test))]
|
||||
pub fn from_highlights(id: DocumentId, highlights: &[Highlight]) -> Document {
|
||||
Document { id, highlights: highlights.to_owned() }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn from_highlights(id: DocumentId, highlights: &[Highlight]) -> Document {
|
||||
Document { id, highlights: highlights.to_owned(), matches: Vec::new() }
|
||||
}
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub fn from_raw<'a, 'tag, 'txn>(
|
||||
raw_document: RawDocument<'a, 'tag>,
|
||||
queries_kinds: &HashMap<QueryId, &QueryKind>,
|
||||
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
searchable_attrs: Option<&ReorderedAttrs>,
|
||||
schema: &Schema,
|
||||
) -> Document
|
||||
{
|
||||
let highlights = highlights_from_raw_document(
|
||||
&raw_document,
|
||||
queries_kinds,
|
||||
arena,
|
||||
searchable_attrs,
|
||||
schema,
|
||||
);
|
||||
|
||||
Document { id: raw_document.id, highlights }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn from_raw<'a, 'tag, 'txn>(
|
||||
raw_document: RawDocument<'a, 'tag>,
|
||||
queries_kinds: &HashMap<QueryId, &QueryKind>,
|
||||
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
searchable_attrs: Option<&ReorderedAttrs>,
|
||||
schema: &Schema,
|
||||
) -> Document
|
||||
{
|
||||
use crate::bucket_sort::SimpleMatch;
|
||||
|
||||
let highlights = highlights_from_raw_document(
|
||||
&raw_document,
|
||||
queries_kinds,
|
||||
arena,
|
||||
searchable_attrs,
|
||||
schema,
|
||||
);
|
||||
|
||||
let mut matches = Vec::new();
|
||||
for sm in raw_document.processed_matches {
|
||||
let attribute = searchable_attrs
|
||||
.and_then(|sa| sa.reverse(sm.attribute))
|
||||
.unwrap_or(sm.attribute);
|
||||
|
||||
let attribute = match schema.indexed_pos_to_field_id(attribute) {
|
||||
Some(field_id) => field_id.0,
|
||||
None => {
|
||||
error!("Cannot convert indexed_pos {} to field_id", attribute);
|
||||
trace!("Schema is compromized; {:?}", schema);
|
||||
continue
|
||||
}
|
||||
};
|
||||
|
||||
matches.push(SimpleMatch { attribute, ..sm });
|
||||
}
|
||||
matches.sort_unstable();
|
||||
|
||||
Document { id: raw_document.id, highlights, matches }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::mem;
|
||||
|
||||
#[test]
|
||||
fn docindex_mem_size() {
|
||||
assert_eq!(mem::size_of::<DocIndex>(), 12);
|
||||
}
|
||||
}
|
||||
@@ -1,120 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
use std::num::{ParseFloatError, ParseIntError};
|
||||
use std::str::FromStr;
|
||||
|
||||
use ordered_float::OrderedFloat;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Copy, Clone)]
|
||||
pub enum Number {
|
||||
Unsigned(u64),
|
||||
Signed(i64),
|
||||
Float(OrderedFloat<f64>),
|
||||
Null,
|
||||
}
|
||||
|
||||
impl Default for Number {
|
||||
fn default() -> Self {
|
||||
Self::Null
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Number {
|
||||
type Err = ParseNumberError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let uint_error = match u64::from_str(s) {
|
||||
Ok(unsigned) => return Ok(Number::Unsigned(unsigned)),
|
||||
Err(error) => error,
|
||||
};
|
||||
|
||||
let int_error = match i64::from_str(s) {
|
||||
Ok(signed) => return Ok(Number::Signed(signed)),
|
||||
Err(error) => error,
|
||||
};
|
||||
|
||||
let float_error = match f64::from_str(s) {
|
||||
Ok(float) => return Ok(Number::Float(OrderedFloat(float))),
|
||||
Err(error) => error,
|
||||
};
|
||||
|
||||
Err(ParseNumberError {
|
||||
uint_error,
|
||||
int_error,
|
||||
float_error,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Number {
|
||||
fn eq(&self, other: &Number) -> bool {
|
||||
self.cmp(other) == Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Number {}
|
||||
|
||||
impl PartialOrd for Number {
|
||||
fn partial_cmp(&self, other: &Number) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Number {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
use Number::{Float, Signed, Unsigned, Null};
|
||||
|
||||
match (*self, *other) {
|
||||
(Unsigned(a), Unsigned(b)) => a.cmp(&b),
|
||||
(Unsigned(a), Signed(b)) => {
|
||||
if b < 0 {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
a.cmp(&(b as u64))
|
||||
}
|
||||
}
|
||||
(Unsigned(a), Float(b)) => (OrderedFloat(a as f64)).cmp(&b),
|
||||
(Signed(a), Unsigned(b)) => {
|
||||
if a < 0 {
|
||||
Ordering::Less
|
||||
} else {
|
||||
(a as u64).cmp(&b)
|
||||
}
|
||||
}
|
||||
(Signed(a), Signed(b)) => a.cmp(&b),
|
||||
(Signed(a), Float(b)) => OrderedFloat(a as f64).cmp(&b),
|
||||
(Float(a), Unsigned(b)) => a.cmp(&OrderedFloat(b as f64)),
|
||||
(Float(a), Signed(b)) => a.cmp(&OrderedFloat(b as f64)),
|
||||
(Float(a), Float(b)) => a.cmp(&b),
|
||||
(Null, Null) => Ordering::Equal,
|
||||
(_, Null) => Ordering::Less,
|
||||
(Null, _) => Ordering::Greater,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ParseNumberError {
|
||||
uint_error: ParseIntError,
|
||||
int_error: ParseIntError,
|
||||
float_error: ParseFloatError,
|
||||
}
|
||||
|
||||
impl fmt::Display for ParseNumberError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.uint_error == self.int_error {
|
||||
write!(
|
||||
f,
|
||||
"can not parse number: {}, {}",
|
||||
self.uint_error, self.float_error
|
||||
)
|
||||
} else {
|
||||
write!(
|
||||
f,
|
||||
"can not parse number: {}, {}, {}",
|
||||
self.uint_error, self.int_error, self.float_error
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,560 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::ops::Range;
|
||||
use std::time::Instant;
|
||||
use std::{cmp, fmt, iter::once};
|
||||
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use itertools::{EitherOrBoth, merge_join_by};
|
||||
use meilisearch_tokenizer::split_query_string;
|
||||
use sdset::{Set, SetBuf, SetOperation};
|
||||
use log::debug;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::{store, DocumentId, DocIndex, MResult, FstSetCow};
|
||||
use crate::automaton::{normalize_str, build_dfa, build_prefix_dfa, build_exact_dfa};
|
||||
use crate::QueryWordsMapper;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Operation {
|
||||
And(Vec<Operation>),
|
||||
Or(Vec<Operation>),
|
||||
Query(Query),
|
||||
}
|
||||
|
||||
impl fmt::Debug for Operation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn pprint_tree(f: &mut fmt::Formatter<'_>, op: &Operation, depth: usize) -> fmt::Result {
|
||||
match op {
|
||||
Operation::And(children) => {
|
||||
writeln!(f, "{:1$}AND", "", depth * 2)?;
|
||||
children.iter().try_for_each(|c| pprint_tree(f, c, depth + 1))
|
||||
},
|
||||
Operation::Or(children) => {
|
||||
writeln!(f, "{:1$}OR", "", depth * 2)?;
|
||||
children.iter().try_for_each(|c| pprint_tree(f, c, depth + 1))
|
||||
},
|
||||
Operation::Query(query) => writeln!(f, "{:2$}{:?}", "", query, depth * 2),
|
||||
}
|
||||
}
|
||||
|
||||
pprint_tree(f, self, 0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Operation {
|
||||
fn tolerant(id: QueryId, prefix: bool, s: &str) -> Operation {
|
||||
Operation::Query(Query { id, prefix, exact: true, kind: QueryKind::Tolerant(s.to_string()) })
|
||||
}
|
||||
|
||||
fn non_tolerant(id: QueryId, prefix: bool, s: &str) -> Operation {
|
||||
Operation::Query(Query { id, prefix, exact: true, kind: QueryKind::NonTolerant(s.to_string()) })
|
||||
}
|
||||
|
||||
fn phrase2(id: QueryId, prefix: bool, (left, right): (&str, &str)) -> Operation {
|
||||
let kind = QueryKind::Phrase(vec![left.to_owned(), right.to_owned()]);
|
||||
Operation::Query(Query { id, prefix, exact: true, kind })
|
||||
}
|
||||
}
|
||||
|
||||
pub type QueryId = usize;
|
||||
|
||||
#[derive(Clone, Eq)]
|
||||
pub struct Query {
|
||||
pub id: QueryId,
|
||||
pub prefix: bool,
|
||||
pub exact: bool,
|
||||
pub kind: QueryKind,
|
||||
}
|
||||
|
||||
impl PartialEq for Query {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.prefix == other.prefix && self.kind == other.kind
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for Query {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.prefix.hash(state);
|
||||
self.kind.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum QueryKind {
|
||||
Tolerant(String),
|
||||
NonTolerant(String),
|
||||
Phrase(Vec<String>),
|
||||
}
|
||||
|
||||
impl fmt::Debug for Query {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let Query { id, prefix, kind, .. } = self;
|
||||
let prefix = if *prefix { String::from("Prefix") } else { String::default() };
|
||||
match kind {
|
||||
QueryKind::NonTolerant(word) => {
|
||||
f.debug_struct(&(prefix + "NonTolerant")).field("id", &id).field("word", &word).finish()
|
||||
},
|
||||
QueryKind::Tolerant(word) => {
|
||||
f.debug_struct(&(prefix + "Tolerant")).field("id", &id).field("word", &word).finish()
|
||||
},
|
||||
QueryKind::Phrase(words) => {
|
||||
f.debug_struct(&(prefix + "Phrase")).field("id", &id).field("words", &words).finish()
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct PostingsList {
|
||||
docids: SetBuf<DocumentId>,
|
||||
matches: SetBuf<DocIndex>,
|
||||
}
|
||||
|
||||
pub struct Context<'a> {
|
||||
pub words_set: FstSetCow<'a>,
|
||||
pub stop_words: FstSetCow<'a>,
|
||||
pub synonyms: store::Synonyms,
|
||||
pub postings_lists: store::PostingsLists,
|
||||
pub prefix_postings_lists: store::PrefixPostingsListsCache,
|
||||
}
|
||||
|
||||
fn split_best_frequency<'a>(reader: &heed::RoTxn<MainT>, ctx: &Context, word: &'a str) -> MResult<Option<(&'a str, &'a str)>> {
|
||||
let chars = word.char_indices().skip(1);
|
||||
let mut best = None;
|
||||
|
||||
for (i, _) in chars {
|
||||
let (left, right) = word.split_at(i);
|
||||
|
||||
let left_freq = ctx.postings_lists
|
||||
.postings_list(reader, left.as_bytes())?
|
||||
.map(|p| p.docids.len())
|
||||
.unwrap_or(0);
|
||||
let right_freq = ctx.postings_lists
|
||||
.postings_list(reader, right.as_bytes())?
|
||||
.map(|p| p.docids.len())
|
||||
.unwrap_or(0);
|
||||
|
||||
let min_freq = cmp::min(left_freq, right_freq);
|
||||
if min_freq != 0 && best.map_or(true, |(old, _, _)| min_freq > old) {
|
||||
best = Some((min_freq, left, right));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(best.map(|(_, l, r)| (l, r)))
|
||||
}
|
||||
|
||||
fn fetch_synonyms(reader: &heed::RoTxn<MainT>, ctx: &Context, words: &[&str]) -> MResult<Vec<Vec<String>>> {
|
||||
let words = normalize_str(&words.join(" "));
|
||||
let set = ctx.synonyms.synonyms_fst(reader, words.as_bytes())?;
|
||||
|
||||
let mut strings = Vec::new();
|
||||
let mut stream = set.stream();
|
||||
while let Some(input) = stream.next() {
|
||||
if let Ok(input) = std::str::from_utf8(input) {
|
||||
let alts = input.split_ascii_whitespace().map(ToOwned::to_owned).collect();
|
||||
strings.push(alts);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(strings)
|
||||
}
|
||||
|
||||
fn create_operation<I, F>(iter: I, f: F) -> Operation
|
||||
where I: IntoIterator<Item=Operation>,
|
||||
F: Fn(Vec<Operation>) -> Operation,
|
||||
{
|
||||
let mut iter = iter.into_iter();
|
||||
match (iter.next(), iter.next()) {
|
||||
(Some(first), None) => first,
|
||||
(first, second) => f(first.into_iter().chain(second).chain(iter).collect()),
|
||||
}
|
||||
}
|
||||
|
||||
const MAX_NGRAM: usize = 3;
|
||||
|
||||
pub fn create_query_tree(
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
query: &str,
|
||||
) -> MResult<(Operation, HashMap<QueryId, Range<usize>>)>
|
||||
{
|
||||
let words = split_query_string(query).map(str::to_lowercase);
|
||||
let words = words.filter(|w| !ctx.stop_words.contains(w));
|
||||
let words: Vec<_> = words.enumerate().collect();
|
||||
|
||||
let mut mapper = QueryWordsMapper::new(words.iter().map(|(_, w)| w));
|
||||
|
||||
fn create_inner(
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
mapper: &mut QueryWordsMapper,
|
||||
words: &[(usize, String)],
|
||||
) -> MResult<Vec<Operation>>
|
||||
{
|
||||
let mut alts = Vec::new();
|
||||
|
||||
for ngram in 1..=MAX_NGRAM {
|
||||
if let Some(group) = words.get(..ngram) {
|
||||
let mut group_ops = Vec::new();
|
||||
|
||||
let tail = &words[ngram..];
|
||||
let is_last = tail.is_empty();
|
||||
|
||||
let mut group_alts = Vec::new();
|
||||
match group {
|
||||
[(id, word)] => {
|
||||
let mut idgen = ((id + 1) * 100)..;
|
||||
let range = (*id)..id+1;
|
||||
|
||||
let phrase = split_best_frequency(reader, ctx, word)?
|
||||
.map(|ws| {
|
||||
let id = idgen.next().unwrap();
|
||||
idgen.next().unwrap();
|
||||
mapper.declare(range.clone(), id, &[ws.0, ws.1]);
|
||||
Operation::phrase2(id, is_last, ws)
|
||||
});
|
||||
|
||||
let synonyms = fetch_synonyms(reader, ctx, &[word])?
|
||||
.into_iter()
|
||||
.map(|alts| {
|
||||
let exact = alts.len() == 1;
|
||||
let id = idgen.next().unwrap();
|
||||
mapper.declare(range.clone(), id, &alts);
|
||||
|
||||
let mut idgen = once(id).chain(&mut idgen);
|
||||
let iter = alts.into_iter().map(|w| {
|
||||
let id = idgen.next().unwrap();
|
||||
let kind = QueryKind::NonTolerant(w);
|
||||
Operation::Query(Query { id, prefix: false, exact, kind })
|
||||
});
|
||||
|
||||
create_operation(iter, Operation::And)
|
||||
});
|
||||
|
||||
let original = Operation::tolerant(*id, is_last, word);
|
||||
|
||||
group_alts.push(original);
|
||||
group_alts.extend(synonyms.chain(phrase));
|
||||
},
|
||||
words => {
|
||||
let id = words[0].0;
|
||||
let mut idgen = ((id + 1) * 100_usize.pow(ngram as u32))..;
|
||||
let range = id..id+ngram;
|
||||
|
||||
let words: Vec<_> = words.iter().map(|(_, s)| s.as_str()).collect();
|
||||
|
||||
for synonym in fetch_synonyms(reader, ctx, &words)? {
|
||||
let exact = synonym.len() == 1;
|
||||
let id = idgen.next().unwrap();
|
||||
mapper.declare(range.clone(), id, &synonym);
|
||||
|
||||
let mut idgen = once(id).chain(&mut idgen);
|
||||
let synonym = synonym.into_iter().map(|s| {
|
||||
let id = idgen.next().unwrap();
|
||||
let kind = QueryKind::NonTolerant(s);
|
||||
Operation::Query(Query { id, prefix: false, exact, kind })
|
||||
});
|
||||
group_alts.push(create_operation(synonym, Operation::And));
|
||||
}
|
||||
|
||||
let id = idgen.next().unwrap();
|
||||
let concat = words.concat();
|
||||
mapper.declare(range.clone(), id, &[&concat]);
|
||||
group_alts.push(Operation::non_tolerant(id, is_last, &concat));
|
||||
}
|
||||
}
|
||||
|
||||
group_ops.push(create_operation(group_alts, Operation::Or));
|
||||
|
||||
if !tail.is_empty() {
|
||||
let tail_ops = create_inner(reader, ctx, mapper, tail)?;
|
||||
group_ops.push(create_operation(tail_ops, Operation::Or));
|
||||
}
|
||||
|
||||
alts.push(create_operation(group_ops, Operation::And));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(alts)
|
||||
}
|
||||
|
||||
let alternatives = create_inner(reader, ctx, &mut mapper, &words)?;
|
||||
let operation = Operation::Or(alternatives);
|
||||
let mapping = mapper.mapping();
|
||||
|
||||
Ok((operation, mapping))
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct PostingsKey<'o> {
|
||||
pub query: &'o Query,
|
||||
pub input: Vec<u8>,
|
||||
pub distance: u8,
|
||||
pub is_exact: bool,
|
||||
}
|
||||
|
||||
pub type Postings<'o, 'txn> = HashMap<PostingsKey<'o>, Cow<'txn, Set<DocIndex>>>;
|
||||
pub type Cache<'o, 'txn> = HashMap<&'o Operation, Cow<'txn, Set<DocumentId>>>;
|
||||
|
||||
pub struct QueryResult<'o, 'txn> {
|
||||
pub docids: Cow<'txn, Set<DocumentId>>,
|
||||
pub queries: Postings<'o, 'txn>,
|
||||
}
|
||||
|
||||
pub fn traverse_query_tree<'o, 'txn>(
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
tree: &'o Operation,
|
||||
) -> MResult<QueryResult<'o, 'txn>>
|
||||
{
|
||||
fn execute_and<'o, 'txn>(
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
cache: &mut Cache<'o, 'txn>,
|
||||
postings: &mut Postings<'o, 'txn>,
|
||||
depth: usize,
|
||||
operations: &'o [Operation],
|
||||
) -> MResult<Cow<'txn, Set<DocumentId>>>
|
||||
{
|
||||
debug!("{:1$}AND", "", depth * 2);
|
||||
|
||||
let before = Instant::now();
|
||||
let mut results = Vec::new();
|
||||
|
||||
for op in operations {
|
||||
if cache.get(op).is_none() {
|
||||
let docids = match op {
|
||||
Operation::And(ops) => execute_and(reader, ctx, cache, postings, depth + 1, &ops)?,
|
||||
Operation::Or(ops) => execute_or(reader, ctx, cache, postings, depth + 1, &ops)?,
|
||||
Operation::Query(query) => execute_query(reader, ctx, postings, depth + 1, &query)?,
|
||||
};
|
||||
cache.insert(op, docids);
|
||||
}
|
||||
}
|
||||
|
||||
for op in operations {
|
||||
if let Some(docids) = cache.get(op) {
|
||||
results.push(docids.as_ref());
|
||||
}
|
||||
}
|
||||
|
||||
let op = sdset::multi::Intersection::new(results);
|
||||
let docids = op.into_set_buf();
|
||||
|
||||
debug!("{:3$}--- AND fetched {} documents in {:.02?}", "", docids.len(), before.elapsed(), depth * 2);
|
||||
|
||||
Ok(Cow::Owned(docids))
|
||||
}
|
||||
|
||||
fn execute_or<'o, 'txn>(
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
cache: &mut Cache<'o, 'txn>,
|
||||
postings: &mut Postings<'o, 'txn>,
|
||||
depth: usize,
|
||||
operations: &'o [Operation],
|
||||
) -> MResult<Cow<'txn, Set<DocumentId>>>
|
||||
{
|
||||
debug!("{:1$}OR", "", depth * 2);
|
||||
|
||||
let before = Instant::now();
|
||||
let mut results = Vec::new();
|
||||
|
||||
for op in operations {
|
||||
if cache.get(op).is_none() {
|
||||
let docids = match op {
|
||||
Operation::And(ops) => execute_and(reader, ctx, cache, postings, depth + 1, &ops)?,
|
||||
Operation::Or(ops) => execute_or(reader, ctx, cache, postings, depth + 1, &ops)?,
|
||||
Operation::Query(query) => execute_query(reader, ctx, postings, depth + 1, &query)?,
|
||||
};
|
||||
cache.insert(op, docids);
|
||||
}
|
||||
}
|
||||
|
||||
for op in operations {
|
||||
if let Some(docids) = cache.get(op) {
|
||||
results.push(docids.as_ref());
|
||||
}
|
||||
}
|
||||
|
||||
let op = sdset::multi::Union::new(results);
|
||||
let docids = op.into_set_buf();
|
||||
|
||||
debug!("{:3$}--- OR fetched {} documents in {:.02?}", "", docids.len(), before.elapsed(), depth * 2);
|
||||
|
||||
Ok(Cow::Owned(docids))
|
||||
}
|
||||
|
||||
fn execute_query<'o, 'txn>(
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
postings: &mut Postings<'o, 'txn>,
|
||||
depth: usize,
|
||||
query: &'o Query,
|
||||
) -> MResult<Cow<'txn, Set<DocumentId>>>
|
||||
{
|
||||
let before = Instant::now();
|
||||
|
||||
let Query { prefix, kind, exact, .. } = query;
|
||||
let docids: Cow<Set<_>> = match kind {
|
||||
QueryKind::Tolerant(word) => {
|
||||
if *prefix && word.len() <= 2 {
|
||||
let prefix = {
|
||||
let mut array = [0; 4];
|
||||
let bytes = word.as_bytes();
|
||||
array[..bytes.len()].copy_from_slice(bytes);
|
||||
array
|
||||
};
|
||||
|
||||
// We retrieve the cached postings lists for all
|
||||
// the words that starts with this short prefix.
|
||||
let result = ctx.prefix_postings_lists.prefix_postings_list(reader, prefix)?.unwrap_or_default();
|
||||
let key = PostingsKey { query, input: word.clone().into_bytes(), distance: 0, is_exact: false };
|
||||
postings.insert(key, result.matches);
|
||||
let prefix_docids = &result.docids;
|
||||
|
||||
// We retrieve the exact postings list for the prefix,
|
||||
// because we must consider these matches as exact.
|
||||
let result = ctx.postings_lists.postings_list(reader, word.as_bytes())?.unwrap_or_default();
|
||||
let key = PostingsKey { query, input: word.clone().into_bytes(), distance: 0, is_exact: true };
|
||||
postings.insert(key, result.matches);
|
||||
let exact_docids = &result.docids;
|
||||
|
||||
let before = Instant::now();
|
||||
let docids = sdset::duo::Union::new(prefix_docids, exact_docids).into_set_buf();
|
||||
debug!("{:4$}prefix docids ({} and {}) construction took {:.02?}",
|
||||
"", prefix_docids.len(), exact_docids.len(), before.elapsed(), depth * 2);
|
||||
|
||||
Cow::Owned(docids)
|
||||
|
||||
} else {
|
||||
let dfa = if *prefix { build_prefix_dfa(word) } else { build_dfa(word) };
|
||||
|
||||
let byte = word.as_bytes()[0];
|
||||
let mut stream = if byte == u8::max_value() {
|
||||
ctx.words_set.search(&dfa).ge(&[byte]).into_stream()
|
||||
} else {
|
||||
ctx.words_set.search(&dfa).ge(&[byte]).lt(&[byte + 1]).into_stream()
|
||||
};
|
||||
|
||||
let before = Instant::now();
|
||||
let mut results = Vec::new();
|
||||
while let Some(input) = stream.next() {
|
||||
if let Some(result) = ctx.postings_lists.postings_list(reader, input)? {
|
||||
let distance = dfa.eval(input).to_u8();
|
||||
let is_exact = *exact && distance == 0 && input.len() == word.len();
|
||||
results.push(result.docids);
|
||||
let key = PostingsKey { query, input: input.to_owned(), distance, is_exact };
|
||||
postings.insert(key, result.matches);
|
||||
}
|
||||
}
|
||||
debug!("{:3$}docids retrieval ({:?}) took {:.02?}", "", results.len(), before.elapsed(), depth * 2);
|
||||
|
||||
let before = Instant::now();
|
||||
let docids = if results.len() > 10 {
|
||||
let cap = results.iter().map(|dis| dis.len()).sum();
|
||||
let mut docids = Vec::with_capacity(cap);
|
||||
for dis in results {
|
||||
docids.extend_from_slice(&dis);
|
||||
}
|
||||
SetBuf::from_dirty(docids)
|
||||
} else {
|
||||
let sets = results.iter().map(AsRef::as_ref).collect();
|
||||
sdset::multi::Union::new(sets).into_set_buf()
|
||||
};
|
||||
debug!("{:2$}docids construction took {:.02?}", "", before.elapsed(), depth * 2);
|
||||
|
||||
Cow::Owned(docids)
|
||||
}
|
||||
},
|
||||
QueryKind::NonTolerant(word) => {
|
||||
// TODO support prefix and non-prefix exact DFA
|
||||
let dfa = build_exact_dfa(word);
|
||||
|
||||
let byte = word.as_bytes()[0];
|
||||
let mut stream = if byte == u8::max_value() {
|
||||
ctx.words_set.search(&dfa).ge(&[byte]).into_stream()
|
||||
} else {
|
||||
ctx.words_set.search(&dfa).ge(&[byte]).lt(&[byte + 1]).into_stream()
|
||||
};
|
||||
|
||||
let before = Instant::now();
|
||||
let mut results = Vec::new();
|
||||
while let Some(input) = stream.next() {
|
||||
if let Some(result) = ctx.postings_lists.postings_list(reader, input)? {
|
||||
let distance = dfa.eval(input).to_u8();
|
||||
results.push(result.docids);
|
||||
let key = PostingsKey { query, input: input.to_owned(), distance, is_exact: *exact };
|
||||
postings.insert(key, result.matches);
|
||||
}
|
||||
}
|
||||
debug!("{:3$}docids retrieval ({:?}) took {:.02?}", "", results.len(), before.elapsed(), depth * 2);
|
||||
|
||||
let before = Instant::now();
|
||||
let docids = if results.len() > 10 {
|
||||
let cap = results.iter().map(|dis| dis.len()).sum();
|
||||
let mut docids = Vec::with_capacity(cap);
|
||||
for dis in results {
|
||||
docids.extend_from_slice(&dis);
|
||||
}
|
||||
SetBuf::from_dirty(docids)
|
||||
} else {
|
||||
let sets = results.iter().map(AsRef::as_ref).collect();
|
||||
sdset::multi::Union::new(sets).into_set_buf()
|
||||
};
|
||||
debug!("{:2$}docids construction took {:.02?}", "", before.elapsed(), depth * 2);
|
||||
|
||||
Cow::Owned(docids)
|
||||
},
|
||||
QueryKind::Phrase(words) => {
|
||||
// TODO support prefix and non-prefix exact DFA
|
||||
if let [first, second] = words.as_slice() {
|
||||
let first = ctx.postings_lists.postings_list(reader, first.as_bytes())?.unwrap_or_default();
|
||||
let second = ctx.postings_lists.postings_list(reader, second.as_bytes())?.unwrap_or_default();
|
||||
|
||||
let iter = merge_join_by(first.matches.as_slice(), second.matches.as_slice(), |a, b| {
|
||||
let x = (a.document_id, a.attribute, (a.word_index as u32) + 1);
|
||||
let y = (b.document_id, b.attribute, b.word_index as u32);
|
||||
x.cmp(&y)
|
||||
});
|
||||
|
||||
let matches: Vec<_> = iter
|
||||
.filter_map(EitherOrBoth::both)
|
||||
.flat_map(|(a, b)| once(*a).chain(Some(*b)))
|
||||
.collect();
|
||||
|
||||
let before = Instant::now();
|
||||
let mut docids: Vec<_> = matches.iter().map(|m| m.document_id).collect();
|
||||
docids.dedup();
|
||||
let docids = SetBuf::new(docids).unwrap();
|
||||
debug!("{:2$}docids construction took {:.02?}", "", before.elapsed(), depth * 2);
|
||||
|
||||
let matches = Cow::Owned(SetBuf::from_dirty(matches));
|
||||
let key = PostingsKey { query, input: vec![], distance: 0, is_exact: true };
|
||||
postings.insert(key, matches);
|
||||
|
||||
Cow::Owned(docids)
|
||||
} else {
|
||||
debug!("{:2$}{:?} skipped", "", words, depth * 2);
|
||||
Cow::default()
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
debug!("{:4$}{:?} fetched {:?} documents in {:.02?}", "", query, docids.len(), before.elapsed(), depth * 2);
|
||||
Ok(docids)
|
||||
}
|
||||
|
||||
let mut cache = Cache::new();
|
||||
let mut postings = Postings::new();
|
||||
|
||||
let docids = match tree {
|
||||
Operation::And(ops) => execute_and(reader, ctx, &mut cache, &mut postings, 0, &ops)?,
|
||||
Operation::Or(ops) => execute_or(reader, ctx, &mut cache, &mut postings, 0, &ops)?,
|
||||
Operation::Query(query) => execute_query(reader, ctx, &mut postings, 0, &query)?,
|
||||
};
|
||||
|
||||
Ok(QueryResult { docids, queries: postings })
|
||||
}
|
||||
@@ -1,416 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::iter::FromIterator;
|
||||
use std::ops::Range;
|
||||
use intervaltree::{Element, IntervalTree};
|
||||
|
||||
pub type QueryId = usize;
|
||||
|
||||
pub struct QueryWordsMapper {
|
||||
originals: Vec<String>,
|
||||
mappings: HashMap<QueryId, (Range<usize>, Vec<String>)>,
|
||||
}
|
||||
|
||||
impl QueryWordsMapper {
|
||||
pub fn new<I, A>(originals: I) -> QueryWordsMapper
|
||||
where I: IntoIterator<Item = A>,
|
||||
A: ToString,
|
||||
{
|
||||
let originals = originals.into_iter().map(|s| s.to_string()).collect();
|
||||
QueryWordsMapper { originals, mappings: HashMap::new() }
|
||||
}
|
||||
|
||||
#[allow(clippy::len_zero)]
|
||||
pub fn declare<I, A>(&mut self, range: Range<usize>, id: QueryId, replacement: I)
|
||||
where I: IntoIterator<Item = A>,
|
||||
A: ToString,
|
||||
{
|
||||
assert!(range.len() != 0);
|
||||
assert!(self.originals.get(range.clone()).is_some());
|
||||
assert!(id >= self.originals.len());
|
||||
|
||||
let replacement: Vec<_> = replacement.into_iter().map(|s| s.to_string()).collect();
|
||||
|
||||
assert!(!replacement.is_empty());
|
||||
|
||||
// We detect words at the end and at the front of the
|
||||
// replacement that are common with the originals:
|
||||
//
|
||||
// x a b c d e f g
|
||||
// ^^^/ \^^^
|
||||
// a b x c d k j e f
|
||||
// ^^^ ^^^
|
||||
//
|
||||
|
||||
let left = &self.originals[..range.start];
|
||||
let right = &self.originals[range.end..];
|
||||
|
||||
let common_left = longest_common_prefix(left, &replacement);
|
||||
let common_right = longest_common_prefix(&replacement, right);
|
||||
|
||||
for i in 0..common_left {
|
||||
let range = range.start - common_left + i..range.start - common_left + i + 1;
|
||||
let replacement = vec![replacement[i].clone()];
|
||||
self.mappings.insert(id + i, (range, replacement));
|
||||
}
|
||||
|
||||
{
|
||||
let replacement = replacement[common_left..replacement.len() - common_right].to_vec();
|
||||
self.mappings.insert(id + common_left, (range.clone(), replacement));
|
||||
}
|
||||
|
||||
for i in 0..common_right {
|
||||
let id = id + replacement.len() - common_right + i;
|
||||
let range = range.end + i..range.end + i + 1;
|
||||
let replacement = vec![replacement[replacement.len() - common_right + i].clone()];
|
||||
self.mappings.insert(id, (range, replacement));
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mapping(self) -> HashMap<QueryId, Range<usize>> {
|
||||
let mappings = self.mappings.into_iter().map(|(i, (r, v))| (r, (i, v)));
|
||||
let intervals = IntervalTree::from_iter(mappings);
|
||||
|
||||
let mut output = HashMap::new();
|
||||
let mut offset = 0;
|
||||
|
||||
// We map each original word to the biggest number of
|
||||
// associated words.
|
||||
for i in 0..self.originals.len() {
|
||||
let max = intervals.query_point(i)
|
||||
.filter_map(|e| {
|
||||
if e.range.end - 1 == i {
|
||||
let len = e.value.1.iter().skip(i - e.range.start).count();
|
||||
if len != 0 { Some(len) } else { None }
|
||||
} else { None }
|
||||
})
|
||||
.max()
|
||||
.unwrap_or(1);
|
||||
|
||||
let range = i + offset..i + offset + max;
|
||||
output.insert(i, range);
|
||||
offset += max - 1;
|
||||
}
|
||||
|
||||
// We retrieve the range that each original word
|
||||
// is mapped to and apply it to each of the words.
|
||||
for i in 0..self.originals.len() {
|
||||
|
||||
let iter = intervals.query_point(i).filter(|e| e.range.end - 1 == i);
|
||||
for Element { range, value: (id, words) } in iter {
|
||||
|
||||
// We ask for the complete range mapped to the area we map.
|
||||
let start = output.get(&range.start).map(|r| r.start).unwrap_or(range.start);
|
||||
let end = output.get(&(range.end - 1)).map(|r| r.end).unwrap_or(range.end);
|
||||
let range = start..end;
|
||||
|
||||
// We map each query id to one word until the last,
|
||||
// we map it to the remainings words.
|
||||
let add = range.len() - words.len();
|
||||
for (j, x) in range.take(words.len()).enumerate() {
|
||||
let add = if j == words.len() - 1 { add } else { 0 }; // is last?
|
||||
let range = x..x + 1 + add;
|
||||
output.insert(id + j, range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
fn longest_common_prefix<T: Eq + std::fmt::Debug>(a: &[T], b: &[T]) -> usize {
|
||||
let mut best = None;
|
||||
for i in (0..a.len()).rev() {
|
||||
let count = a[i..].iter().zip(b).take_while(|(a, b)| a == b).count();
|
||||
best = match best {
|
||||
Some(old) if count > old => Some(count),
|
||||
Some(_) => break,
|
||||
None => Some(count),
|
||||
};
|
||||
}
|
||||
best.unwrap_or(0)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn original_unmodified() {
|
||||
let query = ["new", "york", "city", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// new york = new york city
|
||||
builder.declare(0..2, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
// new = new york city
|
||||
builder.declare(0..1, 7, &["new", "york", "city"]);
|
||||
// ^ 7 8 9
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // new
|
||||
assert_eq!(mapping[&1], 1..2); // york
|
||||
assert_eq!(mapping[&2], 2..3); // city
|
||||
assert_eq!(mapping[&3], 3..4); // subway
|
||||
|
||||
assert_eq!(mapping[&4], 0..1); // new
|
||||
assert_eq!(mapping[&5], 1..2); // york
|
||||
assert_eq!(mapping[&6], 2..3); // city
|
||||
|
||||
assert_eq!(mapping[&7], 0..1); // new
|
||||
assert_eq!(mapping[&8], 1..2); // york
|
||||
assert_eq!(mapping[&9], 2..3); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn original_unmodified2() {
|
||||
let query = ["new", "york", "city", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// city subway = new york city underground train
|
||||
builder.declare(2..4, 4, &["new", "york", "city", "underground", "train"]);
|
||||
// ^ 4 5 6 7 8
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // new
|
||||
assert_eq!(mapping[&1], 1..2); // york
|
||||
assert_eq!(mapping[&2], 2..3); // city
|
||||
assert_eq!(mapping[&3], 3..5); // subway
|
||||
|
||||
assert_eq!(mapping[&4], 0..1); // new
|
||||
assert_eq!(mapping[&5], 1..2); // york
|
||||
assert_eq!(mapping[&6], 2..3); // city
|
||||
assert_eq!(mapping[&7], 3..4); // underground
|
||||
assert_eq!(mapping[&8], 4..5); // train
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn original_unmodified3() {
|
||||
let query = ["a", "b", "x", "x", "a", "b", "c", "d", "e", "f", "g"];
|
||||
// 0 1 2 3 4 5 6 7 8 9 10
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// c d = a b x c d k j e f
|
||||
builder.declare(6..8, 11, &["a", "b", "x", "c", "d", "k", "j", "e", "f"]);
|
||||
// ^^ 11 12 13 14 15 16 17 18 19
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // a
|
||||
assert_eq!(mapping[&1], 1..2); // b
|
||||
assert_eq!(mapping[&2], 2..3); // x
|
||||
assert_eq!(mapping[&3], 3..4); // x
|
||||
assert_eq!(mapping[&4], 4..5); // a
|
||||
assert_eq!(mapping[&5], 5..6); // b
|
||||
assert_eq!(mapping[&6], 6..7); // c
|
||||
assert_eq!(mapping[&7], 7..11); // d
|
||||
assert_eq!(mapping[&8], 11..12); // e
|
||||
assert_eq!(mapping[&9], 12..13); // f
|
||||
assert_eq!(mapping[&10], 13..14); // g
|
||||
|
||||
assert_eq!(mapping[&11], 4..5); // a
|
||||
assert_eq!(mapping[&12], 5..6); // b
|
||||
assert_eq!(mapping[&13], 6..7); // x
|
||||
assert_eq!(mapping[&14], 7..8); // c
|
||||
assert_eq!(mapping[&15], 8..9); // d
|
||||
assert_eq!(mapping[&16], 9..10); // k
|
||||
assert_eq!(mapping[&17], 10..11); // j
|
||||
assert_eq!(mapping[&18], 11..12); // e
|
||||
assert_eq!(mapping[&19], 12..13); // f
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_growing() {
|
||||
let query = ["new", "york", "subway"];
|
||||
// 0 1 2
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// new york = new york city
|
||||
builder.declare(0..2, 3, &["new", "york", "city"]);
|
||||
// ^ 3 4 5
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // new
|
||||
assert_eq!(mapping[&1], 1..3); // york
|
||||
assert_eq!(mapping[&2], 3..4); // subway
|
||||
assert_eq!(mapping[&3], 0..1); // new
|
||||
assert_eq!(mapping[&4], 1..2); // york
|
||||
assert_eq!(mapping[&5], 2..3); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn same_place_growings() {
|
||||
let query = ["NY", "subway"];
|
||||
// 0 1
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NY = new york
|
||||
builder.declare(0..1, 2, &["new", "york"]);
|
||||
// ^ 2 3
|
||||
|
||||
// NY = new york city
|
||||
builder.declare(0..1, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
// NY = NYC
|
||||
builder.declare(0..1, 7, &["NYC"]);
|
||||
// ^ 7
|
||||
|
||||
// NY = new york city
|
||||
builder.declare(0..1, 8, &["new", "york", "city"]);
|
||||
// ^ 8 9 10
|
||||
|
||||
// subway = underground train
|
||||
builder.declare(1..2, 11, &["underground", "train"]);
|
||||
// ^ 11 12
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..3); // NY
|
||||
assert_eq!(mapping[&1], 3..5); // subway
|
||||
assert_eq!(mapping[&2], 0..1); // new
|
||||
assert_eq!(mapping[&3], 1..3); // york
|
||||
assert_eq!(mapping[&4], 0..1); // new
|
||||
assert_eq!(mapping[&5], 1..2); // york
|
||||
assert_eq!(mapping[&6], 2..3); // city
|
||||
assert_eq!(mapping[&7], 0..3); // NYC
|
||||
assert_eq!(mapping[&8], 0..1); // new
|
||||
assert_eq!(mapping[&9], 1..2); // york
|
||||
assert_eq!(mapping[&10], 2..3); // city
|
||||
assert_eq!(mapping[&11], 3..4); // underground
|
||||
assert_eq!(mapping[&12], 4..5); // train
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bigger_growing() {
|
||||
let query = ["NYC", "subway"];
|
||||
// 0 1
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(0..1, 2, &["new", "york", "city"]);
|
||||
// ^ 2 3 4
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..3); // NYC
|
||||
assert_eq!(mapping[&1], 3..4); // subway
|
||||
assert_eq!(mapping[&2], 0..1); // new
|
||||
assert_eq!(mapping[&3], 1..2); // york
|
||||
assert_eq!(mapping[&4], 2..3); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn middle_query_growing() {
|
||||
let query = ["great", "awesome", "NYC", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // great
|
||||
assert_eq!(mapping[&1], 1..2); // awesome
|
||||
assert_eq!(mapping[&2], 2..5); // NYC
|
||||
assert_eq!(mapping[&3], 5..6); // subway
|
||||
assert_eq!(mapping[&4], 2..3); // new
|
||||
assert_eq!(mapping[&5], 3..4); // york
|
||||
assert_eq!(mapping[&6], 4..5); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn end_query_growing() {
|
||||
let query = ["NYC", "subway"];
|
||||
// 0 1
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(1..2, 2, &["underground", "train"]);
|
||||
// ^ 2 3
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // NYC
|
||||
assert_eq!(mapping[&1], 1..3); // subway
|
||||
assert_eq!(mapping[&2], 1..2); // underground
|
||||
assert_eq!(mapping[&3], 2..3); // train
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_growings() {
|
||||
let query = ["great", "awesome", "NYC", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
// subway = underground train
|
||||
builder.declare(3..4, 7, &["underground", "train"]);
|
||||
// ^ 7 8
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // great
|
||||
assert_eq!(mapping[&1], 1..2); // awesome
|
||||
assert_eq!(mapping[&2], 2..5); // NYC
|
||||
assert_eq!(mapping[&3], 5..7); // subway
|
||||
assert_eq!(mapping[&4], 2..3); // new
|
||||
assert_eq!(mapping[&5], 3..4); // york
|
||||
assert_eq!(mapping[&6], 4..5); // city
|
||||
assert_eq!(mapping[&7], 5..6); // underground
|
||||
assert_eq!(mapping[&8], 6..7); // train
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_probable_growings() {
|
||||
let query = ["great", "awesome", "NYC", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
// subway = underground train
|
||||
builder.declare(3..4, 7, &["underground", "train"]);
|
||||
// ^ 7 8
|
||||
|
||||
// great awesome = good
|
||||
builder.declare(0..2, 9, &["good"]);
|
||||
// ^ 9
|
||||
|
||||
// awesome NYC = NY
|
||||
builder.declare(1..3, 10, &["NY"]);
|
||||
// ^^ 10
|
||||
|
||||
// NYC subway = metro
|
||||
builder.declare(2..4, 11, &["metro"]);
|
||||
// ^^ 11
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // great
|
||||
assert_eq!(mapping[&1], 1..2); // awesome
|
||||
assert_eq!(mapping[&2], 2..5); // NYC
|
||||
assert_eq!(mapping[&3], 5..7); // subway
|
||||
assert_eq!(mapping[&4], 2..3); // new
|
||||
assert_eq!(mapping[&5], 3..4); // york
|
||||
assert_eq!(mapping[&6], 4..5); // city
|
||||
assert_eq!(mapping[&7], 5..6); // underground
|
||||
assert_eq!(mapping[&8], 6..7); // train
|
||||
assert_eq!(mapping[&9], 0..2); // good
|
||||
assert_eq!(mapping[&10], 1..5); // NY
|
||||
assert_eq!(mapping[&11], 2..7); // metro
|
||||
}
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
use std::io::{Read, Write};
|
||||
|
||||
use hashbrown::HashMap;
|
||||
use meilisearch_schema::FieldId;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{DocumentId, Number};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct RankedMap(HashMap<(DocumentId, FieldId), Number>);
|
||||
|
||||
impl RankedMap {
|
||||
pub fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, document: DocumentId, field: FieldId, number: Number) {
|
||||
self.0.insert((document, field), number);
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, document: DocumentId, field: FieldId) {
|
||||
self.0.remove(&(document, field));
|
||||
}
|
||||
|
||||
pub fn get(&self, document: DocumentId, field: FieldId) -> Option<Number> {
|
||||
self.0.get(&(document, field)).cloned()
|
||||
}
|
||||
|
||||
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<RankedMap> {
|
||||
bincode::deserialize_from(reader).map(RankedMap)
|
||||
}
|
||||
|
||||
pub fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
|
||||
bincode::serialize_into(writer, &self.0)
|
||||
}
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
use compact_arena::SmallArena;
|
||||
use sdset::SetBuf;
|
||||
use crate::DocIndex;
|
||||
use crate::bucket_sort::{SimpleMatch, BareMatch, PostingsListView};
|
||||
use crate::reordered_attrs::ReorderedAttrs;
|
||||
|
||||
pub struct RawDocument<'a, 'tag> {
|
||||
pub id: crate::DocumentId,
|
||||
pub bare_matches: &'a mut [BareMatch<'tag>],
|
||||
pub processed_matches: Vec<SimpleMatch>,
|
||||
/// The list of minimum `distance` found
|
||||
pub processed_distances: Vec<Option<u8>>,
|
||||
/// Does this document contains a field
|
||||
/// with one word that is exactly matching
|
||||
pub contains_one_word_field: bool,
|
||||
}
|
||||
|
||||
impl<'a, 'tag> RawDocument<'a, 'tag> {
|
||||
pub fn new<'txn>(
|
||||
bare_matches: &'a mut [BareMatch<'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
searchable_attrs: Option<&ReorderedAttrs>,
|
||||
) -> RawDocument<'a, 'tag>
|
||||
{
|
||||
if let Some(reordered_attrs) = searchable_attrs {
|
||||
for bm in bare_matches.iter() {
|
||||
let postings_list = &postings_lists[bm.postings_list];
|
||||
|
||||
let mut rewritten = Vec::new();
|
||||
for di in postings_list.iter() {
|
||||
if let Some(attribute) = reordered_attrs.get(di.attribute) {
|
||||
rewritten.push(DocIndex { attribute, ..*di });
|
||||
}
|
||||
}
|
||||
|
||||
let new_postings = SetBuf::from_dirty(rewritten);
|
||||
postings_lists[bm.postings_list].rewrite_with(new_postings);
|
||||
}
|
||||
}
|
||||
|
||||
bare_matches.sort_unstable_by_key(|m| m.query_index);
|
||||
|
||||
RawDocument {
|
||||
id: bare_matches[0].document_id,
|
||||
bare_matches,
|
||||
processed_matches: Vec::new(),
|
||||
processed_distances: Vec::new(),
|
||||
contains_one_word_field: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,312 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use deunicode::deunicode_with_tofu;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use meilisearch_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer};
|
||||
use sdset::SetBuf;
|
||||
|
||||
use crate::{DocIndex, DocumentId};
|
||||
use crate::FstSetCow;
|
||||
|
||||
const WORD_LENGTH_LIMIT: usize = 80;
|
||||
|
||||
type Word = Vec<u8>; // TODO make it be a SmallVec
|
||||
|
||||
pub struct RawIndexer<A> {
|
||||
word_limit: usize, // the maximum number of indexed words
|
||||
stop_words: fst::Set<A>,
|
||||
words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
|
||||
docs_words: HashMap<DocumentId, Vec<Word>>,
|
||||
}
|
||||
|
||||
pub struct Indexed<'a> {
|
||||
pub words_doc_indexes: BTreeMap<Word, SetBuf<DocIndex>>,
|
||||
pub docs_words: HashMap<DocumentId, FstSetCow<'a>>,
|
||||
}
|
||||
|
||||
impl<A> RawIndexer<A> {
|
||||
pub fn new(stop_words: fst::Set<A>) -> RawIndexer<A> {
|
||||
RawIndexer::with_word_limit(stop_words, 1000)
|
||||
}
|
||||
|
||||
pub fn with_word_limit(stop_words: fst::Set<A>, limit: usize) -> RawIndexer<A> {
|
||||
RawIndexer {
|
||||
word_limit: limit,
|
||||
stop_words,
|
||||
words_doc_indexes: BTreeMap::new(),
|
||||
docs_words: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: AsRef<[u8]>> RawIndexer<A> {
|
||||
pub fn index_text(&mut self, id: DocumentId, indexed_pos: IndexedPos, text: &str) -> usize {
|
||||
let mut number_of_words = 0;
|
||||
|
||||
for token in Tokenizer::new(text) {
|
||||
let must_continue = index_token(
|
||||
token,
|
||||
id,
|
||||
indexed_pos,
|
||||
self.word_limit,
|
||||
&self.stop_words,
|
||||
&mut self.words_doc_indexes,
|
||||
&mut self.docs_words,
|
||||
);
|
||||
|
||||
number_of_words += 1;
|
||||
|
||||
if !must_continue {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
number_of_words
|
||||
}
|
||||
|
||||
pub fn index_text_seq<'s, I>(&mut self, id: DocumentId, indexed_pos: IndexedPos, iter: I)
|
||||
where
|
||||
I: IntoIterator<Item = &'s str>,
|
||||
{
|
||||
let iter = iter.into_iter();
|
||||
for token in SeqTokenizer::new(iter) {
|
||||
let must_continue = index_token(
|
||||
token,
|
||||
id,
|
||||
indexed_pos,
|
||||
self.word_limit,
|
||||
&self.stop_words,
|
||||
&mut self.words_doc_indexes,
|
||||
&mut self.docs_words,
|
||||
);
|
||||
|
||||
if !must_continue {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build(self) -> Indexed<'static> {
|
||||
let words_doc_indexes = self
|
||||
.words_doc_indexes
|
||||
.into_iter()
|
||||
.map(|(word, indexes)| (word, SetBuf::from_dirty(indexes)))
|
||||
.collect();
|
||||
|
||||
let docs_words = self
|
||||
.docs_words
|
||||
.into_iter()
|
||||
.map(|(id, mut words)| {
|
||||
words.sort_unstable();
|
||||
words.dedup();
|
||||
let fst = fst::Set::from_iter(words).unwrap().map_data(Cow::Owned).unwrap();
|
||||
(id, fst)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Indexed {
|
||||
words_doc_indexes,
|
||||
docs_words,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn index_token<A>(
|
||||
token: Token,
|
||||
id: DocumentId,
|
||||
indexed_pos: IndexedPos,
|
||||
word_limit: usize,
|
||||
stop_words: &fst::Set<A>,
|
||||
words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
|
||||
docs_words: &mut HashMap<DocumentId, Vec<Word>>,
|
||||
) -> bool
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
if token.index >= word_limit {
|
||||
return false;
|
||||
}
|
||||
|
||||
let lower = token.word.to_lowercase();
|
||||
let token = Token {
|
||||
word: &lower,
|
||||
..token
|
||||
};
|
||||
|
||||
if !stop_words.contains(&token.word) {
|
||||
match token_to_docindex(id, indexed_pos, token) {
|
||||
Some(docindex) => {
|
||||
let word = Vec::from(token.word);
|
||||
|
||||
if word.len() <= WORD_LENGTH_LIMIT {
|
||||
words_doc_indexes
|
||||
.entry(word.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(docindex);
|
||||
docs_words.entry(id).or_insert_with(Vec::new).push(word);
|
||||
|
||||
if !lower.contains(is_cjk) {
|
||||
let unidecoded = deunicode_with_tofu(&lower, "");
|
||||
if unidecoded != lower && !unidecoded.is_empty() {
|
||||
let word = Vec::from(unidecoded);
|
||||
if word.len() <= WORD_LENGTH_LIMIT {
|
||||
words_doc_indexes
|
||||
.entry(word.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(docindex);
|
||||
docs_words.entry(id).or_insert_with(Vec::new).push(word);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None => return false,
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: Token) -> Option<DocIndex> {
|
||||
let word_index = u16::try_from(token.word_index).ok()?;
|
||||
let char_index = u16::try_from(token.char_index).ok()?;
|
||||
let char_length = u16::try_from(token.word.chars().count()).ok()?;
|
||||
|
||||
let docindex = DocIndex {
|
||||
document_id: id,
|
||||
attribute: indexed_pos.0,
|
||||
word_index,
|
||||
char_index,
|
||||
char_length,
|
||||
};
|
||||
|
||||
Some(docindex)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
|
||||
#[test]
|
||||
fn strange_apostrophe() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
||||
indexer.index_text(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
assert!(words_doc_indexes
|
||||
.get(&"éteindre".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strange_apostrophe_in_sequence() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = vec!["Zut, l’aspirateur, j’ai oublié de l’éteindre !"];
|
||||
indexer.index_text_seq(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
assert!(words_doc_indexes
|
||||
.get(&"éteindre".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_stop_words() {
|
||||
let stop_words = sdset::SetBuf::from_dirty(vec!["l", "j", "ai", "de"]);
|
||||
let stop_words = fst::Set::from_iter(stop_words).unwrap();
|
||||
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
||||
indexer.index_text(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes.get(&b"l"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"j"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"de"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
assert!(words_doc_indexes
|
||||
.get(&"éteindre".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_empty_unidecode() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = "🇯🇵";
|
||||
indexer.index_text(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes
|
||||
.get(&"🇯🇵".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
// test sample from 807
|
||||
fn very_long_text() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let docid = DocumentId(0);
|
||||
let text = " The locations block is the most powerful, and potentially most involved, section of the .platform.app.yaml file. It allows you to control how the application container responds to incoming requests at a very fine-grained level. Common patterns also vary between language containers due to the way PHP-FPM handles incoming requests.\nEach entry of the locations block is an absolute URI path (with leading /) and its value includes the configuration directives for how the web server should handle matching requests. That is, if your domain is example.com then '/' means “requests for example.com/”, while '/admin' means “requests for example.com/admin”. If multiple blocks could match an incoming request then the most-specific will apply.\nweb:locations:'/':# Rules for all requests that don't otherwise match....'/sites/default/files':# Rules for any requests that begin with /sites/default/files....The simplest possible locations configuration is one that simply passes all requests on to your application unconditionally:\nweb:locations:'/':passthru:trueThat is, all requests to /* should be forwarded to the process started by web.commands.start above. Note that for PHP containers the passthru key must specify what PHP file the request should be forwarded to, and must also specify a docroot under which the file lives. For example:\nweb:locations:'/':root:'web'passthru:'/app.php'This block will serve requests to / from the web directory in the application, and if a file doesn’t exist on disk then the request will be forwarded to the /app.php script.\nA full list of the possible subkeys for locations is below.\n root: The folder from which to serve static assets for this location relative to the application root. The application root is the directory in which the .platform.app.yaml file is located. Typical values for this property include public or web. Setting it to '' is not recommended, and its behavior may vary depending on the type of application. Absolute paths are not supported.\n passthru: Whether to forward disallowed and missing resources from this location to the application and can be true, false or an absolute URI path (with leading /). The default value is false. For non-PHP applications it will generally be just true or false. In a PHP application this will typically be the front controller such as /index.php or /app.php. This entry works similar to mod_rewrite under Apache. Note: If the value of passthru does not begin with the same value as the location key it is under, the passthru may evaluate to another entry. That may be useful when you want different cache settings for different paths, for instance, but want missing files in all of them to map back to the same front controller. See the example block below.\n index: The files to consider when serving a request for a directory: an array of file names or null. (typically ['index.html']). Note that in order for this to work, access to the static files named must be allowed by the allow or rules keys for this location.\n expires: How long to allow static assets from this location to be cached (this enables the Cache-Control and Expires headers) and can be a time or -1 for no caching (default). Times can be suffixed with “ms” (milliseconds), “s” (seconds), “m” (minutes), “h” (hours), “d” (days), “w” (weeks), “M” (months, 30d) or “y” (years, 365d).\n scripts: Whether to allow loading scripts in that location (true or false). This directive is only meaningful on PHP.\n allow: Whether to allow serving files which don’t match a rule (true or false, default: true).\n headers: Any additional headers to apply to static assets. This section is a mapping of header names to header values. Responses from the application aren’t affected, to avoid overlap with the application’s own ability to include custom headers in the response.\n rules: Specific overrides for a specific location. The key is a PCRE (regular expression) that is matched against the full request path.\n request_buffering: Most application servers do not support chunked requests (e.g. fpm, uwsgi), so Platform.sh enables request_buffering by default to handle them. That default configuration would look like this if it was present in .platform.app.yaml:\nweb:locations:'/':passthru:truerequest_buffering:enabled:truemax_request_size:250mIf the application server can already efficiently handle chunked requests, the request_buffering subkey can be modified to disable it entirely (enabled: false). Additionally, applications that frequently deal with uploads greater than 250MB in size can update the max_request_size key to the application’s needs. Note that modifications to request_buffering will need to be specified at each location where it is desired.\n ";
|
||||
indexer.index_text(docid, indexed_pos, text);
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
assert!(words_doc_indexes.get(&"buffering".to_owned().into_bytes()).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn words_over_index_1000_not_indexed() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let docid = DocumentId(0);
|
||||
let mut text = String::with_capacity(5000);
|
||||
for _ in 0..1000 {
|
||||
text.push_str("less ");
|
||||
}
|
||||
text.push_str("more");
|
||||
indexer.index_text(docid, indexed_pos, &text);
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
assert!(words_doc_indexes.get(&"less".to_owned().into_bytes()).is_some());
|
||||
assert!(words_doc_indexes.get(&"more".to_owned().into_bytes()).is_none());
|
||||
}
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
use std::cmp;
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
pub struct ReorderedAttrs {
|
||||
reorders: Vec<Option<u16>>,
|
||||
reverse: Vec<u16>,
|
||||
}
|
||||
|
||||
impl ReorderedAttrs {
|
||||
pub fn new() -> ReorderedAttrs {
|
||||
ReorderedAttrs { reorders: Vec::new(), reverse: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn insert_attribute(&mut self, attribute: u16) {
|
||||
let new_len = cmp::max(attribute as usize + 1, self.reorders.len());
|
||||
self.reorders.resize(new_len, None);
|
||||
self.reorders[attribute as usize] = Some(self.reverse.len() as u16);
|
||||
self.reverse.push(attribute);
|
||||
}
|
||||
|
||||
pub fn get(&self, attribute: u16) -> Option<u16> {
|
||||
match self.reorders.get(attribute as usize)? {
|
||||
Some(attribute) => Some(*attribute),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reverse(&self, attribute: u16) -> Option<u16> {
|
||||
self.reverse.get(attribute as usize).copied()
|
||||
}
|
||||
}
|
||||
@@ -1,161 +0,0 @@
|
||||
use std::collections::HashSet;
|
||||
use std::io::Cursor;
|
||||
use std::{error::Error, fmt};
|
||||
|
||||
use meilisearch_schema::{Schema, FieldId};
|
||||
use serde::{de, forward_to_deserialize_any};
|
||||
use serde_json::de::IoRead as SerdeJsonIoRead;
|
||||
use serde_json::Deserializer as SerdeJsonDeserializer;
|
||||
use serde_json::Error as SerdeJsonError;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::store::DocumentsFields;
|
||||
use crate::DocumentId;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DeserializerError {
|
||||
SerdeJson(SerdeJsonError),
|
||||
Zlmdb(heed::Error),
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
impl de::Error for DeserializerError {
|
||||
fn custom<T: fmt::Display>(msg: T) -> Self {
|
||||
DeserializerError::Custom(msg.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for DeserializerError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
DeserializerError::SerdeJson(e) => write!(f, "serde json related error: {}", e),
|
||||
DeserializerError::Zlmdb(e) => write!(f, "heed related error: {}", e),
|
||||
DeserializerError::Custom(s) => f.write_str(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for DeserializerError {}
|
||||
|
||||
impl From<SerdeJsonError> for DeserializerError {
|
||||
fn from(error: SerdeJsonError) -> DeserializerError {
|
||||
DeserializerError::SerdeJson(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<heed::Error> for DeserializerError {
|
||||
fn from(error: heed::Error) -> DeserializerError {
|
||||
DeserializerError::Zlmdb(error)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Deserializer<'a> {
|
||||
pub document_id: DocumentId,
|
||||
pub reader: &'a heed::RoTxn<MainT>,
|
||||
pub documents_fields: DocumentsFields,
|
||||
pub schema: &'a Schema,
|
||||
pub fields: Option<&'a HashSet<FieldId>>,
|
||||
}
|
||||
|
||||
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
||||
type Error = DeserializerError;
|
||||
|
||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
self.deserialize_option(visitor)
|
||||
}
|
||||
|
||||
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
self.deserialize_map(visitor)
|
||||
}
|
||||
|
||||
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
let mut error = None;
|
||||
|
||||
let iter = self
|
||||
.documents_fields
|
||||
.document_fields(self.reader, self.document_id)?
|
||||
.filter_map(|result| {
|
||||
let (attr, value) = match result {
|
||||
Ok(value) => value,
|
||||
Err(e) => {
|
||||
error = Some(e);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
let is_displayed = self.schema.is_displayed(attr);
|
||||
if is_displayed && self.fields.map_or(true, |f| f.contains(&attr)) {
|
||||
if let Some(attribute_name) = self.schema.name(attr) {
|
||||
let cursor = Cursor::new(value.to_owned());
|
||||
let ioread = SerdeJsonIoRead::new(cursor);
|
||||
let value = Value(SerdeJsonDeserializer::new(ioread));
|
||||
|
||||
Some((attribute_name, value))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let mut iter = iter.peekable();
|
||||
|
||||
let result = match iter.peek() {
|
||||
Some(_) => {
|
||||
let map_deserializer = de::value::MapDeserializer::new(iter);
|
||||
visitor
|
||||
.visit_some(map_deserializer)
|
||||
.map_err(DeserializerError::from)
|
||||
}
|
||||
None => visitor.visit_none(),
|
||||
};
|
||||
|
||||
match error.take() {
|
||||
Some(error) => Err(error.into()),
|
||||
None => result,
|
||||
}
|
||||
}
|
||||
|
||||
forward_to_deserialize_any! {
|
||||
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
|
||||
bytes byte_buf unit unit_struct newtype_struct seq tuple
|
||||
tuple_struct struct enum identifier ignored_any
|
||||
}
|
||||
}
|
||||
|
||||
struct Value(SerdeJsonDeserializer<SerdeJsonIoRead<Cursor<Vec<u8>>>>);
|
||||
|
||||
impl<'de> de::IntoDeserializer<'de, SerdeJsonError> for Value {
|
||||
type Deserializer = Self;
|
||||
|
||||
fn into_deserializer(self) -> Self::Deserializer {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> de::Deserializer<'de> for Value {
|
||||
type Error = SerdeJsonError;
|
||||
|
||||
fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
self.0.deserialize_any(visitor)
|
||||
}
|
||||
|
||||
forward_to_deserialize_any! {
|
||||
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
|
||||
bytes byte_buf option unit unit_struct newtype_struct seq tuple
|
||||
tuple_struct map struct enum identifier ignored_any
|
||||
}
|
||||
}
|
||||
@@ -1,92 +0,0 @@
|
||||
mod deserializer;
|
||||
|
||||
pub use self::deserializer::{Deserializer, DeserializerError};
|
||||
|
||||
use std::{error::Error, fmt};
|
||||
|
||||
use serde::ser;
|
||||
use serde_json::Error as SerdeJsonError;
|
||||
use meilisearch_schema::Error as SchemaError;
|
||||
|
||||
use crate::ParseNumberError;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SerializerError {
|
||||
DocumentIdNotFound,
|
||||
InvalidDocumentIdFormat,
|
||||
Zlmdb(heed::Error),
|
||||
SerdeJson(SerdeJsonError),
|
||||
ParseNumber(ParseNumberError),
|
||||
Schema(SchemaError),
|
||||
UnserializableType { type_name: &'static str },
|
||||
UnindexableType { type_name: &'static str },
|
||||
UnrankableType { type_name: &'static str },
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
impl ser::Error for SerializerError {
|
||||
fn custom<T: fmt::Display>(msg: T) -> Self {
|
||||
SerializerError::Custom(msg.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SerializerError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
SerializerError::DocumentIdNotFound => {
|
||||
f.write_str("Primary key is missing.")
|
||||
}
|
||||
SerializerError::InvalidDocumentIdFormat => {
|
||||
f.write_str("a document primary key can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")
|
||||
}
|
||||
SerializerError::Zlmdb(e) => write!(f, "heed related error: {}", e),
|
||||
SerializerError::SerdeJson(e) => write!(f, "serde json error: {}", e),
|
||||
SerializerError::ParseNumber(e) => {
|
||||
write!(f, "error while trying to parse a number: {}", e)
|
||||
}
|
||||
SerializerError::Schema(e) => write!(f, "impossible to update schema: {}", e),
|
||||
SerializerError::UnserializableType { type_name } => {
|
||||
write!(f, "{} is not a serializable type", type_name)
|
||||
}
|
||||
SerializerError::UnindexableType { type_name } => {
|
||||
write!(f, "{} is not an indexable type", type_name)
|
||||
}
|
||||
SerializerError::UnrankableType { type_name } => {
|
||||
write!(f, "{} types can not be used for ranking", type_name)
|
||||
}
|
||||
SerializerError::Custom(s) => f.write_str(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SerializerError {}
|
||||
|
||||
impl From<String> for SerializerError {
|
||||
fn from(value: String) -> SerializerError {
|
||||
SerializerError::Custom(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SerdeJsonError> for SerializerError {
|
||||
fn from(error: SerdeJsonError) -> SerializerError {
|
||||
SerializerError::SerdeJson(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<heed::Error> for SerializerError {
|
||||
fn from(error: heed::Error) -> SerializerError {
|
||||
SerializerError::Zlmdb(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseNumberError> for SerializerError {
|
||||
fn from(error: ParseNumberError) -> SerializerError {
|
||||
SerializerError::ParseNumber(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SchemaError> for SerializerError {
|
||||
fn from(error: SchemaError) -> SerializerError {
|
||||
SerializerError::Schema(error)
|
||||
}
|
||||
}
|
||||
@@ -1,183 +0,0 @@
|
||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use std::str::FromStr;
|
||||
use std::iter::IntoIterator;
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use self::RankingRule::*;
|
||||
|
||||
pub const DEFAULT_RANKING_RULES: [RankingRule; 6] = [Typo, Words, Proximity, Attribute, WordsPosition, Exactness];
|
||||
|
||||
static RANKING_RULE_REGEX: Lazy<regex::Regex> = Lazy::new(|| {
|
||||
regex::Regex::new(r"(asc|desc)\(([a-zA-Z0-9-_]*)\)").unwrap()
|
||||
});
|
||||
|
||||
#[derive(Default, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct Settings {
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub ranking_rules: Option<Option<Vec<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub distinct_attribute: Option<Option<String>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub searchable_attributes: Option<Option<Vec<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub displayed_attributes: Option<Option<HashSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub stop_words: Option<Option<BTreeSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub attributes_for_faceting: Option<Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
// Any value that is present is considered Some value, including null.
|
||||
fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
|
||||
where T: Deserialize<'de>,
|
||||
D: Deserializer<'de>
|
||||
{
|
||||
Deserialize::deserialize(deserializer).map(Some)
|
||||
}
|
||||
|
||||
impl Settings {
|
||||
pub fn to_update(&self) -> Result<SettingsUpdate, RankingRuleConversionError> {
|
||||
let settings = self.clone();
|
||||
|
||||
let ranking_rules = match settings.ranking_rules {
|
||||
Some(Some(rules)) => UpdateState::Update(RankingRule::try_from_iter(rules.iter())?),
|
||||
Some(None) => UpdateState::Clear,
|
||||
None => UpdateState::Nothing,
|
||||
};
|
||||
|
||||
Ok(SettingsUpdate {
|
||||
ranking_rules,
|
||||
distinct_attribute: settings.distinct_attribute.into(),
|
||||
primary_key: UpdateState::Nothing,
|
||||
searchable_attributes: settings.searchable_attributes.into(),
|
||||
displayed_attributes: settings.displayed_attributes.into(),
|
||||
stop_words: settings.stop_words.into(),
|
||||
synonyms: settings.synonyms.into(),
|
||||
attributes_for_faceting: settings.attributes_for_faceting.into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateState<T> {
|
||||
Update(T),
|
||||
Clear,
|
||||
Nothing,
|
||||
}
|
||||
|
||||
impl <T> From<Option<Option<T>>> for UpdateState<T> {
|
||||
fn from(opt: Option<Option<T>>) -> UpdateState<T> {
|
||||
match opt {
|
||||
Some(Some(t)) => UpdateState::Update(t),
|
||||
Some(None) => UpdateState::Clear,
|
||||
None => UpdateState::Nothing,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RankingRuleConversionError;
|
||||
|
||||
impl std::fmt::Display for RankingRuleConversionError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "impossible to convert into RankingRule")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum RankingRule {
|
||||
Typo,
|
||||
Words,
|
||||
Proximity,
|
||||
Attribute,
|
||||
WordsPosition,
|
||||
Exactness,
|
||||
Asc(String),
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RankingRule {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
RankingRule::Typo => f.write_str("typo"),
|
||||
RankingRule::Words => f.write_str("words"),
|
||||
RankingRule::Proximity => f.write_str("proximity"),
|
||||
RankingRule::Attribute => f.write_str("attribute"),
|
||||
RankingRule::WordsPosition => f.write_str("wordsPosition"),
|
||||
RankingRule::Exactness => f.write_str("exactness"),
|
||||
RankingRule::Asc(field) => write!(f, "asc({})", field),
|
||||
RankingRule::Desc(field) => write!(f, "desc({})", field),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for RankingRule {
|
||||
type Err = RankingRuleConversionError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let rule = match s {
|
||||
"typo" => RankingRule::Typo,
|
||||
"words" => RankingRule::Words,
|
||||
"proximity" => RankingRule::Proximity,
|
||||
"attribute" => RankingRule::Attribute,
|
||||
"wordsPosition" => RankingRule::WordsPosition,
|
||||
"exactness" => RankingRule::Exactness,
|
||||
_ => {
|
||||
let captures = RANKING_RULE_REGEX.captures(s).ok_or(RankingRuleConversionError)?;
|
||||
match (captures.get(1).map(|m| m.as_str()), captures.get(2)) {
|
||||
(Some("asc"), Some(field)) => RankingRule::Asc(field.as_str().to_string()),
|
||||
(Some("desc"), Some(field)) => RankingRule::Desc(field.as_str().to_string()),
|
||||
_ => return Err(RankingRuleConversionError)
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(rule)
|
||||
}
|
||||
}
|
||||
|
||||
impl RankingRule {
|
||||
pub fn field(&self) -> Option<&str> {
|
||||
match self {
|
||||
RankingRule::Asc(field) | RankingRule::Desc(field) => Some(field),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_iter(rules: impl IntoIterator<Item = impl AsRef<str>>) -> Result<Vec<RankingRule>, RankingRuleConversionError> {
|
||||
rules.into_iter()
|
||||
.map(|s| RankingRule::from_str(s.as_ref()))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SettingsUpdate {
|
||||
pub ranking_rules: UpdateState<Vec<RankingRule>>,
|
||||
pub distinct_attribute: UpdateState<String>,
|
||||
pub primary_key: UpdateState<String>,
|
||||
pub searchable_attributes: UpdateState<Vec<String>>,
|
||||
pub displayed_attributes: UpdateState<HashSet<String>>,
|
||||
pub stop_words: UpdateState<BTreeSet<String>>,
|
||||
pub synonyms: UpdateState<BTreeMap<String, Vec<String>>>,
|
||||
pub attributes_for_faceting: UpdateState<Vec<String>>,
|
||||
}
|
||||
|
||||
impl Default for SettingsUpdate {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ranking_rules: UpdateState::Nothing,
|
||||
distinct_attribute: UpdateState::Nothing,
|
||||
primary_key: UpdateState::Nothing,
|
||||
searchable_attributes: UpdateState::Nothing,
|
||||
displayed_attributes: UpdateState::Nothing,
|
||||
stop_words: UpdateState::Nothing,
|
||||
synonyms: UpdateState::Nothing,
|
||||
attributes_for_faceting: UpdateState::Nothing,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::{types::CowSlice, BytesEncode, BytesDecode};
|
||||
use sdset::{Set, SetBuf};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
pub struct CowSet<T>(std::marker::PhantomData<T>);
|
||||
|
||||
impl<'a, T: 'a> BytesEncode<'a> for CowSet<T>
|
||||
where
|
||||
T: AsBytes,
|
||||
{
|
||||
type EItem = Set<T>;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<[u8]>> {
|
||||
CowSlice::bytes_encode(item.as_slice())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: 'a> BytesDecode<'a> for CowSet<T>
|
||||
where
|
||||
T: FromBytes + Copy,
|
||||
{
|
||||
type DItem = Cow<'a, Set<T>>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
match CowSlice::<T>::bytes_decode(bytes)? {
|
||||
Cow::Owned(vec) => Some(Cow::Owned(SetBuf::new_unchecked(vec))),
|
||||
Cow::Borrowed(slice) => Some(Cow::Borrowed(Set::new_unchecked(slice))),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use heed::types::{ByteSlice, OwnedType};
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::{DocumentId, FstSetCow};
|
||||
use super::BEU32;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocsWords {
|
||||
pub(crate) docs_words: heed::Database<OwnedType<BEU32>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl DocsWords {
|
||||
pub fn put_doc_words(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
words: &FstSetCow,
|
||||
) -> ZResult<()> {
|
||||
let document_id = BEU32::new(document_id.0);
|
||||
let bytes = words.as_fst().as_bytes();
|
||||
self.docs_words.put(writer, &document_id, bytes)
|
||||
}
|
||||
|
||||
pub fn del_doc_words(self, writer: &mut heed::RwTxn<MainT>, document_id: DocumentId) -> ZResult<bool> {
|
||||
let document_id = BEU32::new(document_id.0);
|
||||
self.docs_words.delete(writer, &document_id)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.docs_words.clear(writer)
|
||||
}
|
||||
|
||||
pub fn doc_words(self, reader: &heed::RoTxn<MainT>, document_id: DocumentId) -> ZResult<FstSetCow> {
|
||||
let document_id = BEU32::new(document_id.0);
|
||||
match self.docs_words.get(reader, &document_id)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,79 +0,0 @@
|
||||
use heed::types::{ByteSlice, OwnedType};
|
||||
use crate::database::MainT;
|
||||
use heed::Result as ZResult;
|
||||
use meilisearch_schema::FieldId;
|
||||
|
||||
use super::DocumentFieldStoredKey;
|
||||
use crate::DocumentId;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocumentsFields {
|
||||
pub(crate) documents_fields: heed::Database<OwnedType<DocumentFieldStoredKey>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl DocumentsFields {
|
||||
pub fn put_document_field(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
field: FieldId,
|
||||
value: &[u8],
|
||||
) -> ZResult<()> {
|
||||
let key = DocumentFieldStoredKey::new(document_id, field);
|
||||
self.documents_fields.put(writer, &key, value)
|
||||
}
|
||||
|
||||
pub fn del_all_document_fields(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<usize> {
|
||||
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
|
||||
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
|
||||
self.documents_fields.delete_range(writer, &(start..=end))
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.documents_fields.clear(writer)
|
||||
}
|
||||
|
||||
pub fn document_attribute<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
field: FieldId,
|
||||
) -> ZResult<Option<&'txn [u8]>> {
|
||||
let key = DocumentFieldStoredKey::new(document_id, field);
|
||||
self.documents_fields.get(reader, &key)
|
||||
}
|
||||
|
||||
pub fn document_fields<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<DocumentFieldsIter<'txn>> {
|
||||
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
|
||||
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
|
||||
let iter = self.documents_fields.range(reader, &(start..=end))?;
|
||||
Ok(DocumentFieldsIter { iter })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsIter<'txn> {
|
||||
iter: heed::RoRange<'txn, OwnedType<DocumentFieldStoredKey>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl<'txn> Iterator for DocumentFieldsIter<'txn> {
|
||||
type Item = ZResult<(FieldId, &'txn [u8])>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, bytes))) => {
|
||||
let field_id = FieldId(key.field_id.get());
|
||||
Some(Ok((field_id, bytes)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,143 +0,0 @@
|
||||
use super::DocumentFieldIndexedKey;
|
||||
use crate::database::MainT;
|
||||
use crate::DocumentId;
|
||||
use heed::types::OwnedType;
|
||||
use heed::Result as ZResult;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use crate::MResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocumentsFieldsCounts {
|
||||
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl DocumentsFieldsCounts {
|
||||
pub fn put_document_field_count(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: IndexedPos,
|
||||
value: u16,
|
||||
) -> ZResult<()> {
|
||||
let key = DocumentFieldIndexedKey::new(document_id, attribute);
|
||||
self.documents_fields_counts.put(writer, &key, &value)
|
||||
}
|
||||
|
||||
pub fn del_all_document_fields_counts(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<usize> {
|
||||
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
|
||||
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
|
||||
self.documents_fields_counts.delete_range(writer, &(start..=end))
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.documents_fields_counts.clear(writer)
|
||||
}
|
||||
|
||||
pub fn document_field_count(
|
||||
self,
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: IndexedPos,
|
||||
) -> ZResult<Option<u16>> {
|
||||
let key = DocumentFieldIndexedKey::new(document_id, attribute);
|
||||
match self.documents_fields_counts.get(reader, &key)? {
|
||||
Some(count) => Ok(Some(count)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn document_fields_counts<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<DocumentFieldsCountsIter<'txn>> {
|
||||
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
|
||||
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
|
||||
let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
|
||||
Ok(DocumentFieldsCountsIter { iter })
|
||||
}
|
||||
|
||||
pub fn documents_ids<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> MResult<DocumentsIdsIter<'txn>> {
|
||||
let iter = self.documents_fields_counts.iter(reader)?;
|
||||
Ok(DocumentsIdsIter {
|
||||
last_seen_id: None,
|
||||
iter,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn all_documents_fields_counts<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
) -> ZResult<AllDocumentsFieldsCountsIter<'txn>> {
|
||||
let iter = self.documents_fields_counts.iter(reader)?;
|
||||
Ok(AllDocumentsFieldsCountsIter { iter })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsCountsIter<'txn> {
|
||||
iter: heed::RoRange<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl Iterator for DocumentFieldsCountsIter<'_> {
|
||||
type Item = ZResult<(IndexedPos, u16)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, count))) => {
|
||||
let indexed_pos = IndexedPos(key.indexed_pos.get());
|
||||
Some(Ok((indexed_pos, count)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentsIdsIter<'txn> {
|
||||
last_seen_id: Option<DocumentId>,
|
||||
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl Iterator for DocumentsIdsIter<'_> {
|
||||
type Item = MResult<DocumentId>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
for result in &mut self.iter {
|
||||
match result {
|
||||
Ok((key, _)) => {
|
||||
let document_id = DocumentId(key.docid.get());
|
||||
if Some(document_id) != self.last_seen_id {
|
||||
self.last_seen_id = Some(document_id);
|
||||
return Some(Ok(document_id));
|
||||
}
|
||||
}
|
||||
Err(e) => return Some(Err(e.into())),
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AllDocumentsFieldsCountsIter<'txn> {
|
||||
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl Iterator for AllDocumentsFieldsCountsIter<'_> {
|
||||
type Item = ZResult<(DocumentId, IndexedPos, u16)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, count))) => {
|
||||
let docid = DocumentId(key.docid.get());
|
||||
let indexed_pos = IndexedPos(key.indexed_pos.get());
|
||||
Some(Ok((docid, indexed_pos, count)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use sdset::Set;
|
||||
|
||||
use crate::DocumentId;
|
||||
use super::cow_set::CowSet;
|
||||
|
||||
pub struct DocumentsIds;
|
||||
|
||||
impl BytesEncode<'_> for DocumentsIds {
|
||||
type EItem = Set<DocumentId>;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
CowSet::bytes_encode(item)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for DocumentsIds {
|
||||
type DItem = Cow<'a, Set<DocumentId>>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
CowSet::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DiscoverIds<'a> {
|
||||
ids_iter: std::slice::Iter<'a, DocumentId>,
|
||||
left_id: Option<u32>,
|
||||
right_id: Option<u32>,
|
||||
available_range: std::ops::Range<u32>,
|
||||
}
|
||||
|
||||
impl DiscoverIds<'_> {
|
||||
pub fn new(ids: &Set<DocumentId>) -> DiscoverIds {
|
||||
let mut ids_iter = ids.iter();
|
||||
let right_id = ids_iter.next().map(|id| id.0);
|
||||
let available_range = 0..right_id.unwrap_or(u32::max_value());
|
||||
DiscoverIds { ids_iter, left_id: None, right_id, available_range }
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for DiscoverIds<'_> {
|
||||
type Item = DocumentId;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
match self.available_range.next() {
|
||||
// The available range gives us a new id, we return it.
|
||||
Some(id) => return Some(DocumentId(id)),
|
||||
// The available range is exhausted, we need to find the next one.
|
||||
None if self.available_range.end == u32::max_value() => return None,
|
||||
None => loop {
|
||||
self.left_id = self.right_id.take();
|
||||
self.right_id = self.ids_iter.next().map(|id| id.0);
|
||||
match (self.left_id, self.right_id) {
|
||||
// We found a gap in the used ids, we can yield all ids
|
||||
// until the end of the gap
|
||||
(Some(l), Some(r)) => if l.saturating_add(1) != r {
|
||||
self.available_range = (l + 1)..r;
|
||||
break;
|
||||
},
|
||||
// The last used id has been reached, we can use all ids
|
||||
// until u32 MAX
|
||||
(Some(l), None) => {
|
||||
self.available_range = l.saturating_add(1)..u32::max_value();
|
||||
break;
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,97 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::mem;
|
||||
|
||||
use heed::{RwTxn, RoTxn, RoRange, types::Str, BytesEncode, BytesDecode};
|
||||
use sdset::{SetBuf, Set, SetOperation};
|
||||
|
||||
use meilisearch_types::DocumentId;
|
||||
use meilisearch_schema::FieldId;
|
||||
|
||||
use crate::MResult;
|
||||
use crate::database::MainT;
|
||||
use crate::facets::FacetKey;
|
||||
use super::cow_set::CowSet;
|
||||
|
||||
/// contains facet info
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Facets {
|
||||
pub(crate) facets: heed::Database<FacetKey, FacetData>,
|
||||
}
|
||||
|
||||
pub struct FacetData;
|
||||
|
||||
impl<'a> BytesEncode<'a> for FacetData {
|
||||
type EItem = (&'a str, &'a Set<DocumentId>);
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
// get size of the first item
|
||||
let first_size = item.0.as_bytes().len();
|
||||
let size = mem::size_of::<u64>()
|
||||
+ first_size
|
||||
+ item.1.len() * mem::size_of::<DocumentId>();
|
||||
let mut buffer = Vec::with_capacity(size);
|
||||
// encode the length of the first item
|
||||
buffer.extend_from_slice(&first_size.to_be_bytes());
|
||||
buffer.extend_from_slice(Str::bytes_encode(&item.0)?.as_ref());
|
||||
let second_slice = CowSet::bytes_encode(&item.1)?;
|
||||
buffer.extend_from_slice(second_slice.as_ref());
|
||||
Some(Cow::Owned(buffer))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for FacetData {
|
||||
type DItem = (&'a str, Cow<'a, Set<DocumentId>>);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
const LEN: usize = mem::size_of::<u64>();
|
||||
let mut size_buf = [0; LEN];
|
||||
size_buf.copy_from_slice(bytes.get(0..LEN)?);
|
||||
// decode size of the first item from the bytes
|
||||
let first_size = usize::from_be_bytes(size_buf);
|
||||
// decode first and second items
|
||||
let first_item = Str::bytes_decode(bytes.get(LEN..(LEN + first_size))?)?;
|
||||
let second_item = CowSet::bytes_decode(bytes.get((LEN + first_size)..)?)?;
|
||||
Some((first_item, second_item))
|
||||
}
|
||||
}
|
||||
|
||||
impl Facets {
|
||||
// we use sdset::SetBuf to ensure the docids are sorted.
|
||||
pub fn put_facet_document_ids(&self, writer: &mut RwTxn<MainT>, facet_key: FacetKey, doc_ids: &Set<DocumentId>, facet_value: &str) -> MResult<()> {
|
||||
Ok(self.facets.put(writer, &facet_key, &(facet_value, doc_ids))?)
|
||||
}
|
||||
|
||||
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> MResult<RoRange<'txn, FacetKey, FacetData>> {
|
||||
Ok(self.facets.prefix_iter(reader, &FacetKey::new(field_id, String::new()))?)
|
||||
}
|
||||
|
||||
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> MResult<Option<(&'txn str,Cow<'txn, Set<DocumentId>>)>> {
|
||||
Ok(self.facets.get(reader, &facet_key)?)
|
||||
}
|
||||
|
||||
/// updates the facets store, revmoving the documents from the facets provided in the
|
||||
/// `facet_map` argument
|
||||
pub fn remove(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)>) -> MResult<()> {
|
||||
for (key, (name, document_ids)) in facet_map {
|
||||
if let Some((_, old)) = self.facets.get(writer, &key)? {
|
||||
let to_remove = SetBuf::from_dirty(document_ids);
|
||||
let new = sdset::duo::OpBuilder::new(old.as_ref(), to_remove.as_set()).difference().into_set_buf();
|
||||
self.facets.put(writer, &key, &(&name, new.as_set()))?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)>) -> MResult<()> {
|
||||
for (key, (facet_name, document_ids)) in facet_map {
|
||||
let set = SetBuf::from_dirty(document_ids);
|
||||
self.put_facet_document_ids(writer, key, set.as_set(), &facet_name)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
|
||||
Ok(self.facets.clear(writer)?)
|
||||
}
|
||||
}
|
||||
@@ -1,320 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str, CowSlice};
|
||||
use meilisearch_schema::{FieldId, Schema};
|
||||
use meilisearch_types::DocumentId;
|
||||
use sdset::Set;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::{RankedMap, MResult};
|
||||
use crate::settings::RankingRule;
|
||||
use crate::{FstSetCow, FstMapCow};
|
||||
use super::{CowSet, DocumentsIds};
|
||||
|
||||
const ATTRIBUTES_FOR_FACETING_KEY: &str = "attributes-for-faceting";
|
||||
const CREATED_AT_KEY: &str = "created-at";
|
||||
const CUSTOMS_KEY: &str = "customs";
|
||||
const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute";
|
||||
const EXTERNAL_DOCIDS_KEY: &str = "external-docids";
|
||||
const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||
const INTERNAL_DOCIDS_KEY: &str = "internal-docids";
|
||||
const NAME_KEY: &str = "name";
|
||||
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
|
||||
const RANKED_MAP_KEY: &str = "ranked-map";
|
||||
const RANKING_RULES_KEY: &str = "ranking-rules";
|
||||
const SCHEMA_KEY: &str = "schema";
|
||||
const SORTED_DOCUMENT_IDS_CACHE_KEY: &str = "sorted-document-ids-cache";
|
||||
const STOP_WORDS_KEY: &str = "stop-words";
|
||||
const SYNONYMS_KEY: &str = "synonyms";
|
||||
const UPDATED_AT_KEY: &str = "updated-at";
|
||||
const WORDS_KEY: &str = "words";
|
||||
|
||||
pub type FreqsMap = HashMap<String, usize>;
|
||||
type SerdeFreqsMap = SerdeBincode<FreqsMap>;
|
||||
type SerdeDatetime = SerdeBincode<DateTime<Utc>>;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Main {
|
||||
pub(crate) main: heed::PolyDatabase,
|
||||
}
|
||||
|
||||
impl Main {
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
|
||||
Ok(self.main.clear(writer)?)
|
||||
}
|
||||
|
||||
pub fn put_name(self, writer: &mut heed::RwTxn<MainT>, name: &str) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, Str>(writer, NAME_KEY, name)?)
|
||||
}
|
||||
|
||||
pub fn name(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<String>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, Str>(reader, NAME_KEY)?
|
||||
.map(|name| name.to_owned()))
|
||||
}
|
||||
|
||||
pub fn put_created_at(self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeDatetime>(writer, CREATED_AT_KEY, &Utc::now())?)
|
||||
}
|
||||
|
||||
pub fn created_at(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<DateTime<Utc>>> {
|
||||
Ok(self.main.get::<_, Str, SerdeDatetime>(reader, CREATED_AT_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_updated_at(self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeDatetime>(writer, UPDATED_AT_KEY, &Utc::now())?)
|
||||
}
|
||||
|
||||
pub fn updated_at(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<DateTime<Utc>>> {
|
||||
Ok(self.main.get::<_, Str, SerdeDatetime>(reader, UPDATED_AT_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_internal_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, DocumentsIds>(writer, INTERNAL_DOCIDS_KEY, ids)?)
|
||||
}
|
||||
|
||||
pub fn internal_docids<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> MResult<Cow<'txn, sdset::Set<DocumentId>>> {
|
||||
match self.main.get::<_, Str, DocumentsIds>(reader, INTERNAL_DOCIDS_KEY)? {
|
||||
Some(ids) => Ok(ids),
|
||||
None => Ok(Cow::default()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn merge_internal_docids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &sdset::Set<DocumentId>) -> MResult<()> {
|
||||
use sdset::SetOperation;
|
||||
|
||||
// We do an union of the old and new internal ids.
|
||||
let internal_docids = self.internal_docids(writer)?;
|
||||
let internal_docids = sdset::duo::Union::new(&internal_docids, new_ids).into_set_buf();
|
||||
Ok(self.put_internal_docids(writer, &internal_docids)?)
|
||||
}
|
||||
|
||||
pub fn remove_internal_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> MResult<()> {
|
||||
use sdset::SetOperation;
|
||||
|
||||
// We do a difference of the old and new internal ids.
|
||||
let internal_docids = self.internal_docids(writer)?;
|
||||
let internal_docids = sdset::duo::Difference::new(&internal_docids, ids).into_set_buf();
|
||||
Ok(self.put_internal_docids(writer, &internal_docids)?)
|
||||
}
|
||||
|
||||
pub fn put_external_docids<A>(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map<A>) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes())?)
|
||||
}
|
||||
|
||||
pub fn merge_external_docids<A>(self, writer: &mut heed::RwTxn<MainT>, new_docids: &fst::Map<A>) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
use fst::{Streamer, IntoStreamer};
|
||||
|
||||
// Do an union of the old and the new set of external docids.
|
||||
let external_docids = self.external_docids(writer)?;
|
||||
let mut op = external_docids.op().add(new_docids.into_stream()).r#union();
|
||||
let mut build = fst::MapBuilder::memory();
|
||||
while let Some((docid, values)) = op.next() {
|
||||
build.insert(docid, values[0].value).unwrap();
|
||||
}
|
||||
drop(op);
|
||||
|
||||
let external_docids = build.into_map();
|
||||
Ok(self.put_external_docids(writer, &external_docids)?)
|
||||
}
|
||||
|
||||
pub fn remove_external_docids<A>(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map<A>) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
use fst::{Streamer, IntoStreamer};
|
||||
|
||||
// Do an union of the old and the new set of external docids.
|
||||
let external_docids = self.external_docids(writer)?;
|
||||
let mut op = external_docids.op().add(ids.into_stream()).difference();
|
||||
let mut build = fst::MapBuilder::memory();
|
||||
while let Some((docid, values)) = op.next() {
|
||||
build.insert(docid, values[0].value).unwrap();
|
||||
}
|
||||
drop(op);
|
||||
|
||||
let external_docids = build.into_map();
|
||||
self.put_external_docids(writer, &external_docids)
|
||||
}
|
||||
|
||||
pub fn external_docids(self, reader: &heed::RoTxn<MainT>) -> MResult<FstMapCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, EXTERNAL_DOCIDS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Map::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Map::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn external_to_internal_docid(self, reader: &heed::RoTxn<MainT>, external_docid: &str) -> MResult<Option<DocumentId>> {
|
||||
let external_ids = self.external_docids(reader)?;
|
||||
Ok(external_ids.get(external_docid).map(|id| DocumentId(id as u32)))
|
||||
}
|
||||
|
||||
pub fn words_fst(self, reader: &heed::RoTxn<MainT>) -> MResult<FstSetCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, WORDS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_words_fst<A: AsRef<[u8]>>(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set<A>) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, WORDS_KEY, fst.as_fst().as_bytes())?)
|
||||
}
|
||||
|
||||
pub fn put_sorted_document_ids_cache(self, writer: &mut heed::RwTxn<MainT>, documents_ids: &[DocumentId]) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, CowSlice<DocumentId>>(writer, SORTED_DOCUMENT_IDS_CACHE_KEY, documents_ids)?)
|
||||
}
|
||||
|
||||
pub fn sorted_document_ids_cache(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<Cow<[DocumentId]>>> {
|
||||
Ok(self.main.get::<_, Str, CowSlice<DocumentId>>(reader, SORTED_DOCUMENT_IDS_CACHE_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_schema(self, writer: &mut heed::RwTxn<MainT>, schema: &Schema) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeBincode<Schema>>(writer, SCHEMA_KEY, schema)?)
|
||||
}
|
||||
|
||||
pub fn schema(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<Schema>> {
|
||||
Ok(self.main.get::<_, Str, SerdeBincode<Schema>>(reader, SCHEMA_KEY)?)
|
||||
}
|
||||
|
||||
pub fn delete_schema(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
|
||||
Ok(self.main.delete::<_, Str>(writer, SCHEMA_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_ranked_map(self, writer: &mut heed::RwTxn<MainT>, ranked_map: &RankedMap) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeBincode<RankedMap>>(writer, RANKED_MAP_KEY, &ranked_map)?)
|
||||
}
|
||||
|
||||
pub fn ranked_map(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<RankedMap>> {
|
||||
Ok(self.main.get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_synonyms_fst<A: AsRef<[u8]>>(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set<A>) -> MResult<()> {
|
||||
let bytes = fst.as_fst().as_bytes();
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)?)
|
||||
}
|
||||
|
||||
pub(crate) fn synonyms_fst(self, reader: &heed::RoTxn<MainT>) -> MResult<FstSetCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, SYNONYMS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn synonyms(self, reader: &heed::RoTxn<MainT>) -> MResult<Vec<String>> {
|
||||
let synonyms = self
|
||||
.synonyms_fst(&reader)?
|
||||
.stream()
|
||||
.into_strs()?;
|
||||
Ok(synonyms)
|
||||
}
|
||||
|
||||
pub fn put_stop_words_fst<A: AsRef<[u8]>>(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set<A>) -> MResult<()> {
|
||||
let bytes = fst.as_fst().as_bytes();
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, STOP_WORDS_KEY, bytes)?)
|
||||
}
|
||||
|
||||
pub(crate) fn stop_words_fst(self, reader: &heed::RoTxn<MainT>) -> MResult<FstSetCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, STOP_WORDS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stop_words(self, reader: &heed::RoTxn<MainT>) -> MResult<Vec<String>> {
|
||||
let stop_word_list = self
|
||||
.stop_words_fst(reader)?
|
||||
.stream()
|
||||
.into_strs()?;
|
||||
Ok(stop_word_list)
|
||||
}
|
||||
|
||||
pub fn put_number_of_documents<F>(self, writer: &mut heed::RwTxn<MainT>, f: F) -> MResult<u64>
|
||||
where
|
||||
F: Fn(u64) -> u64,
|
||||
{
|
||||
let new = self.number_of_documents(&*writer).map(f)?;
|
||||
self.main
|
||||
.put::<_, Str, OwnedType<u64>>(writer, NUMBER_OF_DOCUMENTS_KEY, &new)?;
|
||||
Ok(new)
|
||||
}
|
||||
|
||||
pub fn number_of_documents(self, reader: &heed::RoTxn<MainT>) -> MResult<u64> {
|
||||
match self
|
||||
.main
|
||||
.get::<_, Str, OwnedType<u64>>(reader, NUMBER_OF_DOCUMENTS_KEY)? {
|
||||
Some(value) => Ok(value),
|
||||
None => Ok(0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_fields_distribution(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
fields_frequency: &FreqsMap,
|
||||
) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeFreqsMap>(writer, FIELDS_DISTRIBUTION_KEY, fields_frequency)?)
|
||||
}
|
||||
|
||||
pub fn fields_distribution(&self, reader: &heed::RoTxn<MainT>) -> MResult<Option<FreqsMap>> {
|
||||
match self
|
||||
.main
|
||||
.get::<_, Str, SerdeFreqsMap>(reader, FIELDS_DISTRIBUTION_KEY)?
|
||||
{
|
||||
Some(freqs) => Ok(Some(freqs)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn attributes_for_faceting<'txn>(&self, reader: &'txn heed::RoTxn<MainT>) -> MResult<Option<Cow<'txn, Set<FieldId>>>> {
|
||||
Ok(self.main.get::<_, Str, CowSet<FieldId>>(reader, ATTRIBUTES_FOR_FACETING_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_attributes_for_faceting(self, writer: &mut heed::RwTxn<MainT>, attributes: &Set<FieldId>) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, CowSet<FieldId>>(writer, ATTRIBUTES_FOR_FACETING_KEY, attributes)?)
|
||||
}
|
||||
|
||||
pub fn delete_attributes_for_faceting(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
|
||||
Ok(self.main.delete::<_, Str>(writer, ATTRIBUTES_FOR_FACETING_KEY)?)
|
||||
}
|
||||
|
||||
pub fn ranking_rules(&self, reader: &heed::RoTxn<MainT>) -> MResult<Option<Vec<RankingRule>>> {
|
||||
Ok(self.main.get::<_, Str, SerdeBincode<Vec<RankingRule>>>(reader, RANKING_RULES_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_ranking_rules(self, writer: &mut heed::RwTxn<MainT>, value: &[RankingRule]) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeBincode<Vec<RankingRule>>>(writer, RANKING_RULES_KEY, &value.to_vec())?)
|
||||
}
|
||||
|
||||
pub fn delete_ranking_rules(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
|
||||
Ok(self.main.delete::<_, Str>(writer, RANKING_RULES_KEY)?)
|
||||
}
|
||||
|
||||
pub fn distinct_attribute(&self, reader: &heed::RoTxn<MainT>) -> MResult<Option<FieldId>> {
|
||||
match self.main.get::<_, Str, OwnedType<u16>>(reader, DISTINCT_ATTRIBUTE_KEY)? {
|
||||
Some(value) => Ok(Some(FieldId(value.to_owned()))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_distinct_attribute(self, writer: &mut heed::RwTxn<MainT>, value: FieldId) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, OwnedType<u16>>(writer, DISTINCT_ATTRIBUTE_KEY, &value.0)?)
|
||||
}
|
||||
|
||||
pub fn delete_distinct_attribute(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
|
||||
Ok(self.main.delete::<_, Str>(writer, DISTINCT_ATTRIBUTE_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_customs(self, writer: &mut heed::RwTxn<MainT>, customs: &[u8]) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, CUSTOMS_KEY, customs)?)
|
||||
}
|
||||
|
||||
pub fn customs<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> MResult<Option<&'txn [u8]>> {
|
||||
Ok(self.main.get::<_, Str, ByteSlice>(reader, CUSTOMS_KEY)?)
|
||||
}
|
||||
}
|
||||
@@ -1,522 +0,0 @@
|
||||
mod cow_set;
|
||||
mod docs_words;
|
||||
mod documents_ids;
|
||||
mod documents_fields;
|
||||
mod documents_fields_counts;
|
||||
mod facets;
|
||||
mod main;
|
||||
mod postings_lists;
|
||||
mod prefix_documents_cache;
|
||||
mod prefix_postings_lists_cache;
|
||||
mod synonyms;
|
||||
mod updates;
|
||||
mod updates_results;
|
||||
|
||||
pub use self::cow_set::CowSet;
|
||||
pub use self::docs_words::DocsWords;
|
||||
pub use self::documents_fields::{DocumentFieldsIter, DocumentsFields};
|
||||
pub use self::documents_fields_counts::{DocumentFieldsCountsIter, DocumentsFieldsCounts, DocumentsIdsIter};
|
||||
pub use self::documents_ids::{DocumentsIds, DiscoverIds};
|
||||
pub use self::facets::Facets;
|
||||
pub use self::main::Main;
|
||||
pub use self::postings_lists::PostingsLists;
|
||||
pub use self::prefix_documents_cache::PrefixDocumentsCache;
|
||||
pub use self::prefix_postings_lists_cache::PrefixPostingsListsCache;
|
||||
pub use self::synonyms::Synonyms;
|
||||
pub use self::updates::Updates;
|
||||
pub use self::updates_results::UpdatesResults;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashSet;
|
||||
use std::convert::TryInto;
|
||||
use std::{mem, ptr};
|
||||
|
||||
use heed::{BytesEncode, BytesDecode};
|
||||
use meilisearch_schema::{IndexedPos, FieldId};
|
||||
use sdset::{Set, SetBuf};
|
||||
use serde::de::{self, Deserialize};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
use crate::criterion::Criteria;
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::serde::Deserializer;
|
||||
use crate::settings::SettingsUpdate;
|
||||
use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult};
|
||||
|
||||
type BEU32 = zerocopy::U32<byteorder::BigEndian>;
|
||||
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
|
||||
pub type BEU16 = zerocopy::U16<byteorder::BigEndian>;
|
||||
|
||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
pub struct DocumentFieldIndexedKey {
|
||||
docid: BEU32,
|
||||
indexed_pos: BEU16,
|
||||
}
|
||||
|
||||
impl DocumentFieldIndexedKey {
|
||||
fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey {
|
||||
DocumentFieldIndexedKey {
|
||||
docid: BEU32::new(docid.0),
|
||||
indexed_pos: BEU16::new(indexed_pos.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
pub struct DocumentFieldStoredKey {
|
||||
docid: BEU32,
|
||||
field_id: BEU16,
|
||||
}
|
||||
|
||||
impl DocumentFieldStoredKey {
|
||||
fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey {
|
||||
DocumentFieldStoredKey {
|
||||
docid: BEU32::new(docid.0),
|
||||
field_id: BEU16::new(field_id.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
pub struct Postings<'a> {
|
||||
pub docids: Cow<'a, Set<DocumentId>>,
|
||||
pub matches: Cow<'a, Set<DocIndex>>,
|
||||
}
|
||||
|
||||
pub struct PostingsCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for PostingsCodec {
|
||||
type EItem = Postings<'a>;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let u64_size = mem::size_of::<u64>();
|
||||
let docids_size = item.docids.len() * mem::size_of::<DocumentId>();
|
||||
let matches_size = item.matches.len() * mem::size_of::<DocIndex>();
|
||||
|
||||
let mut buffer = Vec::with_capacity(u64_size + docids_size + matches_size);
|
||||
|
||||
let docids_len = item.docids.len() as u64;
|
||||
buffer.extend_from_slice(&docids_len.to_be_bytes());
|
||||
buffer.extend_from_slice(item.docids.as_bytes());
|
||||
buffer.extend_from_slice(item.matches.as_bytes());
|
||||
|
||||
Some(Cow::Owned(buffer))
|
||||
}
|
||||
}
|
||||
|
||||
fn aligned_to(bytes: &[u8], align: usize) -> bool {
|
||||
(bytes as *const _ as *const () as usize) % align == 0
|
||||
}
|
||||
|
||||
fn from_bytes_to_set<'a, T: 'a>(bytes: &'a [u8]) -> Option<Cow<'a, Set<T>>>
|
||||
where T: Clone + FromBytes
|
||||
{
|
||||
match zerocopy::LayoutVerified::<_, [T]>::new_slice(bytes) {
|
||||
Some(layout) => Some(Cow::Borrowed(Set::new_unchecked(layout.into_slice()))),
|
||||
None => {
|
||||
let len = bytes.len();
|
||||
let elem_size = mem::size_of::<T>();
|
||||
|
||||
// ensure that it is the alignment that is wrong
|
||||
// and the length is valid
|
||||
if len % elem_size == 0 && !aligned_to(bytes, mem::align_of::<T>()) {
|
||||
let elems = len / elem_size;
|
||||
let mut vec = Vec::<T>::with_capacity(elems);
|
||||
|
||||
unsafe {
|
||||
let dst = vec.as_mut_ptr() as *mut u8;
|
||||
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, len);
|
||||
vec.set_len(elems);
|
||||
}
|
||||
|
||||
return Some(Cow::Owned(SetBuf::new_unchecked(vec)));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for PostingsCodec {
|
||||
type DItem = Postings<'a>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let u64_size = mem::size_of::<u64>();
|
||||
let docid_size = mem::size_of::<DocumentId>();
|
||||
|
||||
let (len_bytes, bytes) = bytes.split_at(u64_size);
|
||||
let docids_len = len_bytes.try_into().ok().map(u64::from_be_bytes)? as usize;
|
||||
let docids_size = docids_len * docid_size;
|
||||
|
||||
let docids_bytes = &bytes[..docids_size];
|
||||
let matches_bytes = &bytes[docids_size..];
|
||||
|
||||
let docids = from_bytes_to_set(docids_bytes)?;
|
||||
let matches = from_bytes_to_set(matches_bytes)?;
|
||||
|
||||
Some(Postings { docids, matches })
|
||||
}
|
||||
}
|
||||
|
||||
fn main_name(name: &str) -> String {
|
||||
format!("store-{}", name)
|
||||
}
|
||||
|
||||
fn postings_lists_name(name: &str) -> String {
|
||||
format!("store-{}-postings-lists", name)
|
||||
}
|
||||
|
||||
fn documents_fields_name(name: &str) -> String {
|
||||
format!("store-{}-documents-fields", name)
|
||||
}
|
||||
|
||||
fn documents_fields_counts_name(name: &str) -> String {
|
||||
format!("store-{}-documents-fields-counts", name)
|
||||
}
|
||||
|
||||
fn synonyms_name(name: &str) -> String {
|
||||
format!("store-{}-synonyms", name)
|
||||
}
|
||||
|
||||
fn docs_words_name(name: &str) -> String {
|
||||
format!("store-{}-docs-words", name)
|
||||
}
|
||||
|
||||
fn prefix_documents_cache_name(name: &str) -> String {
|
||||
format!("store-{}-prefix-documents-cache", name)
|
||||
}
|
||||
|
||||
fn prefix_postings_lists_cache_name(name: &str) -> String {
|
||||
format!("store-{}-prefix-postings-lists-cache", name)
|
||||
}
|
||||
|
||||
fn updates_name(name: &str) -> String {
|
||||
format!("store-{}-updates", name)
|
||||
}
|
||||
|
||||
fn updates_results_name(name: &str) -> String {
|
||||
format!("store-{}-updates-results", name)
|
||||
}
|
||||
|
||||
fn facets_name(name: &str) -> String {
|
||||
format!("store-{}-facets", name)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index {
|
||||
pub main: Main,
|
||||
pub postings_lists: PostingsLists,
|
||||
pub documents_fields: DocumentsFields,
|
||||
pub documents_fields_counts: DocumentsFieldsCounts,
|
||||
pub facets: Facets,
|
||||
pub synonyms: Synonyms,
|
||||
pub docs_words: DocsWords,
|
||||
pub prefix_documents_cache: PrefixDocumentsCache,
|
||||
pub prefix_postings_lists_cache: PrefixPostingsListsCache,
|
||||
|
||||
pub updates: Updates,
|
||||
pub updates_results: UpdatesResults,
|
||||
pub(crate) updates_notifier: UpdateEventsEmitter,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn document<T: de::DeserializeOwned>(
|
||||
&self,
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
attributes: Option<&HashSet<&str>>,
|
||||
document_id: DocumentId,
|
||||
) -> MResult<Option<T>> {
|
||||
let schema = self.main.schema(reader)?;
|
||||
let schema = schema.ok_or(Error::SchemaMissing)?;
|
||||
|
||||
let attributes = match attributes {
|
||||
Some(attributes) => Some(attributes.iter().filter_map(|name| schema.id(*name)).collect()),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let mut deserializer = Deserializer {
|
||||
document_id,
|
||||
reader,
|
||||
documents_fields: self.documents_fields,
|
||||
schema: &schema,
|
||||
fields: attributes.as_ref(),
|
||||
};
|
||||
|
||||
Ok(Option::<T>::deserialize(&mut deserializer)?)
|
||||
}
|
||||
|
||||
pub fn document_attribute<T: de::DeserializeOwned>(
|
||||
&self,
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: FieldId,
|
||||
) -> MResult<Option<T>> {
|
||||
let bytes = self
|
||||
.documents_fields
|
||||
.document_attribute(reader, document_id, attribute)?;
|
||||
match bytes {
|
||||
Some(bytes) => Ok(Some(serde_json::from_slice(bytes)?)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn document_attribute_bytes<'txn>(
|
||||
&self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: FieldId,
|
||||
) -> MResult<Option<&'txn [u8]>> {
|
||||
let bytes = self
|
||||
.documents_fields
|
||||
.document_attribute(reader, document_id, attribute)?;
|
||||
match bytes {
|
||||
Some(bytes) => Ok(Some(bytes)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn customs_update(&self, writer: &mut heed::RwTxn<UpdateT>, customs: Vec<u8>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
Ok(update::push_customs_update(writer, self.updates, self.updates_results, customs)?)
|
||||
}
|
||||
|
||||
pub fn settings_update(&self, writer: &mut heed::RwTxn<UpdateT>, update: SettingsUpdate) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
Ok(update::push_settings_update(writer, self.updates, self.updates_results, update)?)
|
||||
}
|
||||
|
||||
pub fn documents_addition<D>(&self) -> update::DocumentsAddition<D> {
|
||||
update::DocumentsAddition::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn documents_partial_addition<D>(&self) -> update::DocumentsAddition<D> {
|
||||
update::DocumentsAddition::new_partial(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn documents_deletion(&self) -> update::DocumentsDeletion {
|
||||
update::DocumentsDeletion::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn clear_all(&self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
update::push_clear_all(writer, self.updates, self.updates_results)
|
||||
}
|
||||
|
||||
pub fn current_update_id(&self, reader: &heed::RoTxn<UpdateT>) -> MResult<Option<u64>> {
|
||||
match self.updates.last_update(reader)? {
|
||||
Some((id, _)) => Ok(Some(id)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_status(
|
||||
&self,
|
||||
reader: &heed::RoTxn<UpdateT>,
|
||||
update_id: u64,
|
||||
) -> MResult<Option<update::UpdateStatus>> {
|
||||
update::update_status(reader, self.updates, self.updates_results, update_id)
|
||||
}
|
||||
|
||||
pub fn all_updates_status(&self, reader: &heed::RoTxn<UpdateT>) -> MResult<Vec<update::UpdateStatus>> {
|
||||
let mut updates = Vec::new();
|
||||
let mut last_update_result_id = 0;
|
||||
|
||||
// retrieve all updates results
|
||||
if let Some((last_id, _)) = self.updates_results.last_update(reader)? {
|
||||
updates.reserve(last_id as usize);
|
||||
|
||||
for id in 0..=last_id {
|
||||
if let Some(update) = self.update_status(reader, id)? {
|
||||
updates.push(update);
|
||||
last_update_result_id = id + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// retrieve all enqueued updates
|
||||
if let Some((last_id, _)) = self.updates.last_update(reader)? {
|
||||
for id in last_update_result_id..=last_id {
|
||||
if let Some(update) = self.update_status(reader, id)? {
|
||||
updates.push(update);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(updates)
|
||||
}
|
||||
|
||||
pub fn query_builder(&self) -> QueryBuilder {
|
||||
QueryBuilder::new(self)
|
||||
}
|
||||
|
||||
pub fn query_builder_with_criteria<'c, 'f, 'd, 'i>(
|
||||
&'i self,
|
||||
criteria: Criteria<'c>,
|
||||
) -> QueryBuilder<'c, 'f, 'd, 'i> {
|
||||
QueryBuilder::with_criteria(self, criteria)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create(
|
||||
env: &heed::Env,
|
||||
update_env: &heed::Env,
|
||||
name: &str,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> MResult<Index> {
|
||||
// create all the store names
|
||||
let main_name = main_name(name);
|
||||
let postings_lists_name = postings_lists_name(name);
|
||||
let documents_fields_name = documents_fields_name(name);
|
||||
let documents_fields_counts_name = documents_fields_counts_name(name);
|
||||
let synonyms_name = synonyms_name(name);
|
||||
let docs_words_name = docs_words_name(name);
|
||||
let prefix_documents_cache_name = prefix_documents_cache_name(name);
|
||||
let prefix_postings_lists_cache_name = prefix_postings_lists_cache_name(name);
|
||||
let updates_name = updates_name(name);
|
||||
let updates_results_name = updates_results_name(name);
|
||||
let facets_name = facets_name(name);
|
||||
|
||||
// open all the stores
|
||||
let main = env.create_poly_database(Some(&main_name))?;
|
||||
let postings_lists = env.create_database(Some(&postings_lists_name))?;
|
||||
let documents_fields = env.create_database(Some(&documents_fields_name))?;
|
||||
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
|
||||
let facets = env.create_database(Some(&facets_name))?;
|
||||
let synonyms = env.create_database(Some(&synonyms_name))?;
|
||||
let docs_words = env.create_database(Some(&docs_words_name))?;
|
||||
let prefix_documents_cache = env.create_database(Some(&prefix_documents_cache_name))?;
|
||||
let prefix_postings_lists_cache = env.create_database(Some(&prefix_postings_lists_cache_name))?;
|
||||
let updates = update_env.create_database(Some(&updates_name))?;
|
||||
let updates_results = update_env.create_database(Some(&updates_results_name))?;
|
||||
|
||||
Ok(Index {
|
||||
main: Main { main },
|
||||
postings_lists: PostingsLists { postings_lists },
|
||||
documents_fields: DocumentsFields { documents_fields },
|
||||
documents_fields_counts: DocumentsFieldsCounts { documents_fields_counts },
|
||||
synonyms: Synonyms { synonyms },
|
||||
docs_words: DocsWords { docs_words },
|
||||
prefix_postings_lists_cache: PrefixPostingsListsCache { prefix_postings_lists_cache },
|
||||
prefix_documents_cache: PrefixDocumentsCache { prefix_documents_cache },
|
||||
facets: Facets { facets },
|
||||
|
||||
updates: Updates { updates },
|
||||
updates_results: UpdatesResults { updates_results },
|
||||
updates_notifier,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn open(
|
||||
env: &heed::Env,
|
||||
update_env: &heed::Env,
|
||||
name: &str,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> MResult<Option<Index>> {
|
||||
// create all the store names
|
||||
let main_name = main_name(name);
|
||||
let postings_lists_name = postings_lists_name(name);
|
||||
let documents_fields_name = documents_fields_name(name);
|
||||
let documents_fields_counts_name = documents_fields_counts_name(name);
|
||||
let synonyms_name = synonyms_name(name);
|
||||
let docs_words_name = docs_words_name(name);
|
||||
let prefix_documents_cache_name = prefix_documents_cache_name(name);
|
||||
let facets_name = facets_name(name);
|
||||
let prefix_postings_lists_cache_name = prefix_postings_lists_cache_name(name);
|
||||
let updates_name = updates_name(name);
|
||||
let updates_results_name = updates_results_name(name);
|
||||
|
||||
// open all the stores
|
||||
let main = match env.open_poly_database(Some(&main_name))? {
|
||||
Some(main) => main,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let postings_lists = match env.open_database(Some(&postings_lists_name))? {
|
||||
Some(postings_lists) => postings_lists,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let documents_fields = match env.open_database(Some(&documents_fields_name))? {
|
||||
Some(documents_fields) => documents_fields,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let documents_fields_counts = match env.open_database(Some(&documents_fields_counts_name))? {
|
||||
Some(documents_fields_counts) => documents_fields_counts,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let synonyms = match env.open_database(Some(&synonyms_name))? {
|
||||
Some(synonyms) => synonyms,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let docs_words = match env.open_database(Some(&docs_words_name))? {
|
||||
Some(docs_words) => docs_words,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let prefix_documents_cache = match env.open_database(Some(&prefix_documents_cache_name))? {
|
||||
Some(prefix_documents_cache) => prefix_documents_cache,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let facets = match env.open_database(Some(&facets_name))? {
|
||||
Some(facets) => facets,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let prefix_postings_lists_cache = match env.open_database(Some(&prefix_postings_lists_cache_name))? {
|
||||
Some(prefix_postings_lists_cache) => prefix_postings_lists_cache,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let updates = match update_env.open_database(Some(&updates_name))? {
|
||||
Some(updates) => updates,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let updates_results = match update_env.open_database(Some(&updates_results_name))? {
|
||||
Some(updates_results) => updates_results,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
Ok(Some(Index {
|
||||
main: Main { main },
|
||||
postings_lists: PostingsLists { postings_lists },
|
||||
documents_fields: DocumentsFields { documents_fields },
|
||||
documents_fields_counts: DocumentsFieldsCounts { documents_fields_counts },
|
||||
synonyms: Synonyms { synonyms },
|
||||
docs_words: DocsWords { docs_words },
|
||||
prefix_documents_cache: PrefixDocumentsCache { prefix_documents_cache },
|
||||
facets: Facets { facets },
|
||||
prefix_postings_lists_cache: PrefixPostingsListsCache { prefix_postings_lists_cache },
|
||||
updates: Updates { updates },
|
||||
updates_results: UpdatesResults { updates_results },
|
||||
updates_notifier,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn clear(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
update_writer: &mut heed::RwTxn<UpdateT>,
|
||||
index: &Index,
|
||||
) -> MResult<()> {
|
||||
// clear all the stores
|
||||
index.main.clear(writer)?;
|
||||
index.postings_lists.clear(writer)?;
|
||||
index.documents_fields.clear(writer)?;
|
||||
index.documents_fields_counts.clear(writer)?;
|
||||
index.synonyms.clear(writer)?;
|
||||
index.docs_words.clear(writer)?;
|
||||
index.prefix_documents_cache.clear(writer)?;
|
||||
index.prefix_postings_lists_cache.clear(writer)?;
|
||||
index.updates.clear(update_writer)?;
|
||||
index.updates_results.clear(update_writer)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use heed::types::ByteSlice;
|
||||
use sdset::{Set, SetBuf};
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::DocIndex;
|
||||
use crate::store::{Postings, PostingsCodec};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct PostingsLists {
|
||||
pub(crate) postings_lists: heed::Database<ByteSlice, PostingsCodec>,
|
||||
}
|
||||
|
||||
impl PostingsLists {
|
||||
pub fn put_postings_list(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
word: &[u8],
|
||||
matches: &Set<DocIndex>,
|
||||
) -> ZResult<()> {
|
||||
let docids = matches.linear_group_by_key(|m| m.document_id).map(|g| g[0].document_id).collect();
|
||||
let docids = Cow::Owned(SetBuf::new_unchecked(docids));
|
||||
let matches = Cow::Borrowed(matches);
|
||||
let postings = Postings { docids, matches };
|
||||
|
||||
self.postings_lists.put(writer, word, &postings)
|
||||
}
|
||||
|
||||
pub fn del_postings_list(self, writer: &mut heed::RwTxn<MainT>, word: &[u8]) -> ZResult<bool> {
|
||||
self.postings_lists.delete(writer, word)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.postings_lists.clear(writer)
|
||||
}
|
||||
|
||||
pub fn postings_list<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
word: &[u8],
|
||||
) -> ZResult<Option<Postings<'txn>>> {
|
||||
self.postings_lists.get(reader, word)
|
||||
}
|
||||
}
|
||||
@@ -1,80 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::types::{OwnedType, CowSlice};
|
||||
use heed::Result as ZResult;
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
use super::{BEU64, BEU32};
|
||||
use crate::{DocumentId, Highlight};
|
||||
use crate::database::MainT;
|
||||
|
||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
pub struct PrefixKey {
|
||||
prefix: [u8; 4],
|
||||
index: BEU64,
|
||||
docid: BEU32,
|
||||
}
|
||||
|
||||
impl PrefixKey {
|
||||
pub fn new(prefix: [u8; 4], index: u64, docid: u32) -> PrefixKey {
|
||||
PrefixKey {
|
||||
prefix,
|
||||
index: BEU64::new(index),
|
||||
docid: BEU32::new(docid),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct PrefixDocumentsCache {
|
||||
pub(crate) prefix_documents_cache: heed::Database<OwnedType<PrefixKey>, CowSlice<Highlight>>,
|
||||
}
|
||||
|
||||
impl PrefixDocumentsCache {
|
||||
pub fn put_prefix_document(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
prefix: [u8; 4],
|
||||
index: usize,
|
||||
docid: DocumentId,
|
||||
highlights: &[Highlight],
|
||||
) -> ZResult<()> {
|
||||
let key = PrefixKey::new(prefix, index as u64, docid.0);
|
||||
self.prefix_documents_cache.put(writer, &key, highlights)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.prefix_documents_cache.clear(writer)
|
||||
}
|
||||
|
||||
pub fn prefix_documents<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
prefix: [u8; 4],
|
||||
) -> ZResult<PrefixDocumentsIter<'txn>> {
|
||||
let start = PrefixKey::new(prefix, 0, 0);
|
||||
let end = PrefixKey::new(prefix, u64::max_value(), u32::max_value());
|
||||
let iter = self.prefix_documents_cache.range(reader, &(start..=end))?;
|
||||
Ok(PrefixDocumentsIter { iter })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PrefixDocumentsIter<'txn> {
|
||||
iter: heed::RoRange<'txn, OwnedType<PrefixKey>, CowSlice<Highlight>>,
|
||||
}
|
||||
|
||||
impl<'txn> Iterator for PrefixDocumentsIter<'txn> {
|
||||
type Item = ZResult<(DocumentId, Cow<'txn, [Highlight]>)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, highlights))) => {
|
||||
let docid = DocumentId(key.docid.get());
|
||||
Some(Ok((docid, highlights)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use heed::types::OwnedType;
|
||||
use sdset::{Set, SetBuf};
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::DocIndex;
|
||||
use crate::store::{PostingsCodec, Postings};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct PrefixPostingsListsCache {
|
||||
pub(crate) prefix_postings_lists_cache: heed::Database<OwnedType<[u8; 4]>, PostingsCodec>,
|
||||
}
|
||||
|
||||
impl PrefixPostingsListsCache {
|
||||
pub fn put_prefix_postings_list(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
prefix: [u8; 4],
|
||||
matches: &Set<DocIndex>,
|
||||
) -> ZResult<()>
|
||||
{
|
||||
let docids = matches.linear_group_by_key(|m| m.document_id).map(|g| g[0].document_id).collect();
|
||||
let docids = Cow::Owned(SetBuf::new_unchecked(docids));
|
||||
let matches = Cow::Borrowed(matches);
|
||||
let postings = Postings { docids, matches };
|
||||
|
||||
self.prefix_postings_lists_cache.put(writer, &prefix, &postings)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.prefix_postings_lists_cache.clear(writer)
|
||||
}
|
||||
|
||||
pub fn prefix_postings_list<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
prefix: [u8; 4],
|
||||
) -> ZResult<Option<Postings<'txn>>>
|
||||
{
|
||||
self.prefix_postings_lists_cache.get(reader, &prefix)
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use heed::types::ByteSlice;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::{FstSetCow, MResult};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Synonyms {
|
||||
pub(crate) synonyms: heed::Database<ByteSlice, ByteSlice>,
|
||||
}
|
||||
|
||||
impl Synonyms {
|
||||
pub fn put_synonyms<A>(self, writer: &mut heed::RwTxn<MainT>, word: &[u8], synonyms: &fst::Set<A>) -> ZResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
let bytes = synonyms.as_fst().as_bytes();
|
||||
self.synonyms.put(writer, word, bytes)
|
||||
}
|
||||
|
||||
pub fn del_synonyms(self, writer: &mut heed::RwTxn<MainT>, word: &[u8]) -> ZResult<bool> {
|
||||
self.synonyms.delete(writer, word)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.synonyms.clear(writer)
|
||||
}
|
||||
|
||||
pub(crate) fn synonyms_fst<'txn>(self, reader: &'txn heed::RoTxn<MainT>, word: &[u8]) -> ZResult<FstSetCow<'txn>> {
|
||||
match self.synonyms.get(reader, word)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn synonyms(self, reader: &heed::RoTxn<MainT>, word: &[u8]) -> MResult<Vec<String>> {
|
||||
let synonyms = self
|
||||
.synonyms_fst(&reader, word)?
|
||||
.stream()
|
||||
.into_strs()?;
|
||||
Ok(synonyms)
|
||||
}
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
use super::BEU64;
|
||||
use crate::database::UpdateT;
|
||||
use crate::update::Update;
|
||||
use heed::types::{OwnedType, SerdeJson};
|
||||
use heed::Result as ZResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Updates {
|
||||
pub(crate) updates: heed::Database<OwnedType<BEU64>, SerdeJson<Update>>,
|
||||
}
|
||||
|
||||
impl Updates {
|
||||
// TODO do not trigger deserialize if possible
|
||||
pub fn last_update(self, reader: &heed::RoTxn<UpdateT>) -> ZResult<Option<(u64, Update)>> {
|
||||
match self.updates.last(reader)? {
|
||||
Some((key, data)) => Ok(Some((key.get(), data))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO do not trigger deserialize if possible
|
||||
pub fn first_update(self, reader: &heed::RoTxn<UpdateT>) -> ZResult<Option<(u64, Update)>> {
|
||||
match self.updates.first(reader)? {
|
||||
Some((key, data)) => Ok(Some((key.get(), data))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO do not trigger deserialize if possible
|
||||
pub fn get(self, reader: &heed::RoTxn<UpdateT>, update_id: u64) -> ZResult<Option<Update>> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates.get(reader, &update_id)
|
||||
}
|
||||
|
||||
pub fn put_update(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
update_id: u64,
|
||||
update: &Update,
|
||||
) -> ZResult<()> {
|
||||
// TODO prefer using serde_json?
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates.put(writer, &update_id, update)
|
||||
}
|
||||
|
||||
pub fn del_update(self, writer: &mut heed::RwTxn<UpdateT>, update_id: u64) -> ZResult<bool> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates.delete(writer, &update_id)
|
||||
}
|
||||
|
||||
pub fn pop_front(self, writer: &mut heed::RwTxn<UpdateT>) -> ZResult<Option<(u64, Update)>> {
|
||||
match self.first_update(writer)? {
|
||||
Some((update_id, update)) => {
|
||||
let key = BEU64::new(update_id);
|
||||
self.updates.delete(writer, &key)?;
|
||||
Ok(Some((update_id, update)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<UpdateT>) -> ZResult<()> {
|
||||
self.updates.clear(writer)
|
||||
}
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
use super::BEU64;
|
||||
use crate::database::UpdateT;
|
||||
use crate::update::ProcessedUpdateResult;
|
||||
use heed::types::{OwnedType, SerdeJson};
|
||||
use heed::Result as ZResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct UpdatesResults {
|
||||
pub(crate) updates_results: heed::Database<OwnedType<BEU64>, SerdeJson<ProcessedUpdateResult>>,
|
||||
}
|
||||
|
||||
impl UpdatesResults {
|
||||
pub fn last_update(
|
||||
self,
|
||||
reader: &heed::RoTxn<UpdateT>,
|
||||
) -> ZResult<Option<(u64, ProcessedUpdateResult)>> {
|
||||
match self.updates_results.last(reader)? {
|
||||
Some((key, data)) => Ok(Some((key.get(), data))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_update_result(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
update_id: u64,
|
||||
update_result: &ProcessedUpdateResult,
|
||||
) -> ZResult<()> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates_results.put(writer, &update_id, update_result)
|
||||
}
|
||||
|
||||
pub fn update_result(
|
||||
self,
|
||||
reader: &heed::RoTxn<UpdateT>,
|
||||
update_id: u64,
|
||||
) -> ZResult<Option<ProcessedUpdateResult>> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates_results.get(reader, &update_id)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<UpdateT>) -> ZResult<()> {
|
||||
self.updates_results.clear(writer)
|
||||
}
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult, RankedMap};
|
||||
|
||||
pub fn apply_clear_all(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
) -> MResult<()> {
|
||||
index.main.put_words_fst(writer, &fst::Set::default())?;
|
||||
index.main.put_external_docids(writer, &fst::Map::default())?;
|
||||
index.main.put_internal_docids(writer, &sdset::SetBuf::default())?;
|
||||
index.main.put_ranked_map(writer, &RankedMap::default())?;
|
||||
index.main.put_number_of_documents(writer, |_| 0)?;
|
||||
index.documents_fields.clear(writer)?;
|
||||
index.documents_fields_counts.clear(writer)?;
|
||||
index.postings_lists.clear(writer)?;
|
||||
index.docs_words.clear(writer)?;
|
||||
index.prefix_documents_cache.clear(writer)?;
|
||||
index.prefix_postings_lists_cache.clear(writer)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn push_clear_all(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
let update = Update::clear_all();
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::{store, MResult};
|
||||
use crate::update::{next_update_id, Update};
|
||||
|
||||
pub fn apply_customs_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
main_store: store::Main,
|
||||
customs: &[u8],
|
||||
) -> MResult<()> {
|
||||
main_store.put_customs(writer, customs)
|
||||
}
|
||||
|
||||
pub fn push_customs_update(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
customs: Vec<u8>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::customs(customs);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
@@ -1,426 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{HashMap, BTreeMap};
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_schema::{Schema, FieldId};
|
||||
use meilisearch_types::DocumentId;
|
||||
use sdset::{duo::Union, SetOperation};
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::facets;
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::serde::Deserializer;
|
||||
use crate::store::{self, DocumentsFields, DocumentsFieldsCounts, DiscoverIds};
|
||||
use crate::update::helpers::{index_value, value_to_number, extract_document_id};
|
||||
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
|
||||
use crate::{Error, MResult, RankedMap};
|
||||
|
||||
pub struct DocumentsAddition<D> {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
documents: Vec<D>,
|
||||
is_partial: bool,
|
||||
}
|
||||
|
||||
impl<D> DocumentsAddition<D> {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> DocumentsAddition<D> {
|
||||
DocumentsAddition {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
documents: Vec::new(),
|
||||
is_partial: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_partial(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> DocumentsAddition<D> {
|
||||
DocumentsAddition {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
documents: Vec::new(),
|
||||
is_partial: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_document(&mut self, document: D) {
|
||||
self.documents.push(document);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64>
|
||||
where
|
||||
D: serde::Serialize,
|
||||
{
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_documents_addition(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.documents,
|
||||
self.is_partial,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl<D> Extend<D> for DocumentsAddition<D> {
|
||||
fn extend<T: IntoIterator<Item = D>>(&mut self, iter: T) {
|
||||
self.documents.extend(iter)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_documents_addition<D: serde::Serialize>(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
addition: Vec<D>,
|
||||
is_partial: bool,
|
||||
) -> MResult<u64> {
|
||||
let mut values = Vec::with_capacity(addition.len());
|
||||
for add in addition {
|
||||
let vec = serde_json::to_vec(&add)?;
|
||||
let add = serde_json::from_slice(&vec)?;
|
||||
values.push(add);
|
||||
}
|
||||
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = if is_partial {
|
||||
Update::documents_partial(values)
|
||||
} else {
|
||||
Update::documents_addition(values)
|
||||
};
|
||||
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn index_document<A>(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
documents_fields: DocumentsFields,
|
||||
documents_fields_counts: DocumentsFieldsCounts,
|
||||
ranked_map: &mut RankedMap,
|
||||
indexer: &mut RawIndexer<A>,
|
||||
schema: &Schema,
|
||||
field_id: FieldId,
|
||||
document_id: DocumentId,
|
||||
value: &Value,
|
||||
) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
let serialized = serde_json::to_vec(value)?;
|
||||
documents_fields.put_document_field(writer, document_id, field_id, &serialized)?;
|
||||
|
||||
if let Some(indexed_pos) = schema.is_indexed(field_id) {
|
||||
let number_of_words = index_value(indexer, document_id, *indexed_pos, value);
|
||||
if let Some(number_of_words) = number_of_words {
|
||||
documents_fields_counts.put_document_field_count(
|
||||
writer,
|
||||
document_id,
|
||||
*indexed_pos,
|
||||
number_of_words as u16,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
if schema.is_ranked(field_id) {
|
||||
let number = value_to_number(value).unwrap_or_default();
|
||||
ranked_map.insert(document_id, field_id, number);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn apply_addition<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||
index: &store::Index,
|
||||
new_documents: Vec<IndexMap<String, Value>>,
|
||||
partial: bool
|
||||
) -> MResult<()>
|
||||
{
|
||||
let mut schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
// Retrieve the documents ids related structures
|
||||
let external_docids = index.main.external_docids(writer)?;
|
||||
let internal_docids = index.main.internal_docids(writer)?;
|
||||
let mut available_ids = DiscoverIds::new(&internal_docids);
|
||||
|
||||
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
|
||||
|
||||
// 1. store documents ids for future deletion
|
||||
let mut documents_additions = HashMap::new();
|
||||
let mut new_external_docids = BTreeMap::new();
|
||||
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
|
||||
|
||||
for mut document in new_documents {
|
||||
let external_docids_get = |docid: &str| {
|
||||
match (external_docids.get(docid), new_external_docids.get(docid)) {
|
||||
(_, Some(&id))
|
||||
| (Some(id), _) => Some(id as u32),
|
||||
(None, None) => None,
|
||||
}
|
||||
};
|
||||
|
||||
let (internal_docid, external_docid) =
|
||||
extract_document_id(
|
||||
&primary_key,
|
||||
&document,
|
||||
&external_docids_get,
|
||||
&mut available_ids,
|
||||
)?;
|
||||
|
||||
new_external_docids.insert(external_docid, internal_docid.0 as u64);
|
||||
new_internal_docids.push(internal_docid);
|
||||
|
||||
if partial {
|
||||
let mut deserializer = Deserializer {
|
||||
document_id: internal_docid,
|
||||
reader: writer,
|
||||
documents_fields: index.documents_fields,
|
||||
schema: &schema,
|
||||
fields: None,
|
||||
};
|
||||
|
||||
let old_document = Option::<HashMap<String, Value>>::deserialize(&mut deserializer)?;
|
||||
if let Some(old_document) = old_document {
|
||||
for (key, value) in old_document {
|
||||
document.entry(key).or_insert(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
documents_additions.insert(internal_docid, document);
|
||||
}
|
||||
|
||||
// 2. remove the documents postings lists
|
||||
let number_of_inserted_documents = documents_additions.len();
|
||||
let documents_ids = new_external_docids.iter().map(|(id, _)| id.clone()).collect();
|
||||
apply_documents_deletion(writer, index, documents_ids)?;
|
||||
|
||||
let mut ranked_map = match index.main.ranked_map(writer)? {
|
||||
Some(ranked_map) => ranked_map,
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
let stop_words = index.main.stop_words_fst(writer)?.map_data(Cow::into_owned)?;
|
||||
|
||||
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
|
||||
// For each document in this update
|
||||
for (document_id, document) in &documents_additions {
|
||||
// For each key-value pair in the document.
|
||||
for (attribute, value) in document {
|
||||
let field_id = schema.insert_and_index(&attribute)?;
|
||||
index_document(
|
||||
writer,
|
||||
index.documents_fields,
|
||||
index.documents_fields_counts,
|
||||
&mut ranked_map,
|
||||
&mut indexer,
|
||||
&schema,
|
||||
field_id,
|
||||
*document_id,
|
||||
&value,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
write_documents_addition_index(
|
||||
writer,
|
||||
index,
|
||||
&ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
)?;
|
||||
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(ext, id)| (ext, *id as u64)))?;
|
||||
let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids);
|
||||
index.main.merge_external_docids(writer, &new_external_docids)?;
|
||||
index.main.merge_internal_docids(writer, &new_internal_docids)?;
|
||||
|
||||
// recompute all facet attributes after document update.
|
||||
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
let docids = index.main.internal_docids(writer)?;
|
||||
let facet_map = facets::facet_map_from_docids(writer, index, &docids, attributes_for_facetting.as_ref())?;
|
||||
index.facets.add(writer, facet_map)?;
|
||||
}
|
||||
|
||||
// update is finished; update sorted document id cache with new state
|
||||
let mut document_ids = index.main.internal_docids(writer)?.to_vec();
|
||||
super::cache_document_ids_sorted(writer, &ranked_map, index, &mut document_ids)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn apply_documents_partial_addition<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||
index: &store::Index,
|
||||
new_documents: Vec<IndexMap<String, Value>>,
|
||||
) -> MResult<()> {
|
||||
apply_addition(writer, index, new_documents, true)
|
||||
}
|
||||
|
||||
pub fn apply_documents_addition<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||
index: &store::Index,
|
||||
new_documents: Vec<IndexMap<String, Value>>,
|
||||
) -> MResult<()> {
|
||||
apply_addition(writer, index, new_documents, false)
|
||||
}
|
||||
|
||||
pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Index) -> MResult<()> {
|
||||
let schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
let mut ranked_map = RankedMap::default();
|
||||
|
||||
// 1. retrieve all documents ids
|
||||
let mut documents_ids_to_reindex = Vec::new();
|
||||
for result in index.documents_fields_counts.documents_ids(writer)? {
|
||||
let document_id = result?;
|
||||
documents_ids_to_reindex.push(document_id);
|
||||
}
|
||||
|
||||
// 2. remove the documents posting lists
|
||||
index.main.put_words_fst(writer, &fst::Set::default())?;
|
||||
index.main.put_ranked_map(writer, &ranked_map)?;
|
||||
index.main.put_number_of_documents(writer, |_| 0)?;
|
||||
index.facets.clear(writer)?;
|
||||
index.postings_lists.clear(writer)?;
|
||||
index.docs_words.clear(writer)?;
|
||||
|
||||
let stop_words = index.main
|
||||
.stop_words_fst(writer)?
|
||||
.map_data(Cow::into_owned)
|
||||
.unwrap();
|
||||
|
||||
let number_of_inserted_documents = documents_ids_to_reindex.len();
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
let mut ram_store = HashMap::new();
|
||||
|
||||
if let Some(ref attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
let facet_map = facets::facet_map_from_docids(writer, &index, &documents_ids_to_reindex, &attributes_for_facetting)?;
|
||||
index.facets.add(writer, facet_map)?;
|
||||
}
|
||||
// ^-- https://github.com/meilisearch/MeiliSearch/pull/631#issuecomment-626624470 --v
|
||||
for document_id in &documents_ids_to_reindex {
|
||||
for result in index.documents_fields.document_fields(writer, *document_id)? {
|
||||
let (field_id, bytes) = result?;
|
||||
let value: Value = serde_json::from_slice(bytes)?;
|
||||
ram_store.insert((document_id, field_id), value);
|
||||
}
|
||||
|
||||
// For each key-value pair in the document.
|
||||
for ((document_id, field_id), value) in ram_store.drain() {
|
||||
index_document(
|
||||
writer,
|
||||
index.documents_fields,
|
||||
index.documents_fields_counts,
|
||||
&mut ranked_map,
|
||||
&mut indexer,
|
||||
&schema,
|
||||
field_id,
|
||||
*document_id,
|
||||
&value,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. write the new index in the main store
|
||||
write_documents_addition_index(
|
||||
writer,
|
||||
index,
|
||||
&ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
)?;
|
||||
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
// recompute all facet attributes after document update.
|
||||
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
let docids = index.main.internal_docids(writer)?;
|
||||
let facet_map = facets::facet_map_from_docids(writer, index, &docids, attributes_for_facetting.as_ref())?;
|
||||
index.facets.add(writer, facet_map)?;
|
||||
}
|
||||
|
||||
// update is finished; update sorted document id cache with new state
|
||||
let mut document_ids = index.main.internal_docids(writer)?.to_vec();
|
||||
super::cache_document_ids_sorted(writer, &ranked_map, index, &mut document_ids)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_documents_addition_index<A>(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
ranked_map: &RankedMap,
|
||||
number_of_inserted_documents: usize,
|
||||
indexer: RawIndexer<A>,
|
||||
) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
let indexed = indexer.build();
|
||||
let mut delta_words_builder = SetBuilder::memory();
|
||||
|
||||
for (word, delta_set) in indexed.words_doc_indexes {
|
||||
delta_words_builder.insert(&word).unwrap();
|
||||
|
||||
let set = match index.postings_lists.postings_list(writer, &word)? {
|
||||
Some(postings) => Union::new(&postings.matches, &delta_set).into_set_buf(),
|
||||
None => delta_set,
|
||||
};
|
||||
|
||||
index.postings_lists.put_postings_list(writer, &word, &set)?;
|
||||
}
|
||||
|
||||
for (id, words) in indexed.docs_words {
|
||||
index.docs_words.put_doc_words(writer, id, &words)?;
|
||||
}
|
||||
|
||||
let delta_words = delta_words_builder.into_set();
|
||||
|
||||
let words_fst = index.main.words_fst(writer)?;
|
||||
let words = if !words_fst.is_empty() {
|
||||
let op = OpBuilder::new()
|
||||
.add(words_fst.stream())
|
||||
.add(delta_words.stream())
|
||||
.r#union();
|
||||
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder.into_set()
|
||||
} else {
|
||||
delta_words
|
||||
};
|
||||
|
||||
index.main.put_words_fst(writer, &words)?;
|
||||
index.main.put_ranked_map(writer, ranked_map)?;
|
||||
index.main.put_number_of_documents(writer, |old| old + number_of_inserted_documents as u64)?;
|
||||
|
||||
compute_short_prefixes(writer, &words, index)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,207 +0,0 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
|
||||
use fst::{SetBuilder, Streamer};
|
||||
use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::facets;
|
||||
use crate::store;
|
||||
use crate::update::{next_update_id, compute_short_prefixes, Update};
|
||||
use crate::{DocumentId, Error, MResult, RankedMap, MainWriter, Index};
|
||||
|
||||
pub struct DocumentsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
external_docids: Vec<String>,
|
||||
}
|
||||
|
||||
impl DocumentsDeletion {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> DocumentsDeletion {
|
||||
DocumentsDeletion {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
external_docids: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_document_by_external_docid(&mut self, document_id: String) {
|
||||
self.external_docids.push(document_id);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_documents_deletion(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.external_docids,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl Extend<String> for DocumentsDeletion {
|
||||
fn extend<T: IntoIterator<Item=String>>(&mut self, iter: T) {
|
||||
self.external_docids.extend(iter)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_documents_deletion(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
external_docids: Vec<String>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::documents_deletion(external_docids);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_documents_deletion(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
external_docids: Vec<String>,
|
||||
) -> MResult<()>
|
||||
{
|
||||
let (external_docids, internal_docids) = {
|
||||
let new_external_docids = SetBuf::from_dirty(external_docids);
|
||||
let mut internal_docids = Vec::new();
|
||||
|
||||
let old_external_docids = index.main.external_docids(writer)?;
|
||||
for external_docid in new_external_docids.as_slice() {
|
||||
if let Some(id) = old_external_docids.get(external_docid) {
|
||||
internal_docids.push(DocumentId(id as u32));
|
||||
}
|
||||
}
|
||||
|
||||
let new_external_docids = fst::Map::from_iter(new_external_docids.into_iter().map(|k| (k, 0))).unwrap();
|
||||
(new_external_docids, SetBuf::from_dirty(internal_docids))
|
||||
};
|
||||
|
||||
let schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
let mut ranked_map = match index.main.ranked_map(writer)? {
|
||||
Some(ranked_map) => ranked_map,
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
// facet filters deletion
|
||||
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
let facet_map = facets::facet_map_from_docids(writer, &index, &internal_docids, &attributes_for_facetting)?;
|
||||
index.facets.remove(writer, facet_map)?;
|
||||
}
|
||||
|
||||
// collect the ranked attributes according to the schema
|
||||
let ranked_fields = schema.ranked();
|
||||
|
||||
let mut words_document_ids = HashMap::new();
|
||||
for id in internal_docids.iter().cloned() {
|
||||
// remove all the ranked attributes from the ranked_map
|
||||
for ranked_attr in ranked_fields {
|
||||
ranked_map.remove(id, *ranked_attr);
|
||||
}
|
||||
|
||||
let words = index.docs_words.doc_words(writer, id)?;
|
||||
if !words.is_empty() {
|
||||
let mut stream = words.stream();
|
||||
while let Some(word) = stream.next() {
|
||||
let word = word.to_vec();
|
||||
words_document_ids
|
||||
.entry(word)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut deleted_documents = HashSet::new();
|
||||
let mut removed_words = BTreeSet::new();
|
||||
for (word, document_ids) in words_document_ids {
|
||||
let document_ids = SetBuf::from_dirty(document_ids);
|
||||
|
||||
if let Some(postings) = index.postings_lists.postings_list(writer, &word)? {
|
||||
let op = DifferenceByKey::new(&postings.matches, &document_ids, |d| d.document_id, |id| *id);
|
||||
let doc_indexes = op.into_set_buf();
|
||||
|
||||
if !doc_indexes.is_empty() {
|
||||
index.postings_lists.put_postings_list(writer, &word, &doc_indexes)?;
|
||||
} else {
|
||||
index.postings_lists.del_postings_list(writer, &word)?;
|
||||
removed_words.insert(word);
|
||||
}
|
||||
}
|
||||
|
||||
for id in document_ids {
|
||||
index.documents_fields_counts.del_all_document_fields_counts(writer, id)?;
|
||||
if index.documents_fields.del_all_document_fields(writer, id)? != 0 {
|
||||
deleted_documents.insert(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let deleted_documents_len = deleted_documents.len() as u64;
|
||||
for id in &deleted_documents {
|
||||
index.docs_words.del_doc_words(writer, *id)?;
|
||||
}
|
||||
|
||||
let removed_words = fst::Set::from_iter(removed_words).unwrap();
|
||||
let words = {
|
||||
let words_set = index.main.words_fst(writer)?;
|
||||
let op = fst::set::OpBuilder::new()
|
||||
.add(words_set.stream())
|
||||
.add(removed_words.stream())
|
||||
.difference();
|
||||
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder.into_set()
|
||||
};
|
||||
|
||||
index.main.put_words_fst(writer, &words)?;
|
||||
index.main.put_ranked_map(writer, &ranked_map)?;
|
||||
index.main.put_number_of_documents(writer, |old| old - deleted_documents_len)?;
|
||||
|
||||
// We apply the changes to the user and internal ids
|
||||
index.main.remove_external_docids(writer, &external_docids)?;
|
||||
index.main.remove_internal_docids(writer, &internal_docids)?;
|
||||
|
||||
compute_short_prefixes(writer, &words, index)?;
|
||||
|
||||
// update is finished; update sorted document id cache with new state
|
||||
document_cache_remove_deleted(writer, index, &ranked_map, &deleted_documents)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// rebuilds the document id cache by either removing deleted documents from the existing cache,
|
||||
/// and generating a new one from docs in store
|
||||
fn document_cache_remove_deleted(writer: &mut MainWriter, index: &Index, ranked_map: &RankedMap, documents_to_delete: &HashSet<DocumentId>) -> MResult<()> {
|
||||
let new_cache = match index.main.sorted_document_ids_cache(writer)? {
|
||||
// only keep documents that are not in the list of deleted documents. Order is preserved,
|
||||
// no need to resort
|
||||
Some(old_cache) => {
|
||||
old_cache.iter().filter(|docid| !documents_to_delete.contains(docid)).cloned().collect::<Vec<_>>()
|
||||
}
|
||||
// couldn't find cached documents, try building a new cache from documents in store
|
||||
None => {
|
||||
let mut document_ids = index.main.internal_docids(writer)?.to_vec();
|
||||
super::cache_document_ids_sorted(writer, ranked_map, index, &mut document_ids)?;
|
||||
document_ids
|
||||
}
|
||||
};
|
||||
index.main.put_sorted_document_ids_cache(writer, &new_cache)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,143 +0,0 @@
|
||||
use std::fmt::Write as _;
|
||||
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use meilisearch_types::DocumentId;
|
||||
use ordered_float::OrderedFloat;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::Number;
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::serde::SerializerError;
|
||||
use crate::store::DiscoverIds;
|
||||
|
||||
/// Returns the number of words indexed or `None` if the type is unindexable.
|
||||
pub fn index_value<A>(
|
||||
indexer: &mut RawIndexer<A>,
|
||||
document_id: DocumentId,
|
||||
indexed_pos: IndexedPos,
|
||||
value: &Value,
|
||||
) -> Option<usize>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
match value {
|
||||
Value::Null => None,
|
||||
Value::Bool(boolean) => {
|
||||
let text = boolean.to_string();
|
||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
|
||||
Some(number_of_words)
|
||||
},
|
||||
Value::Number(number) => {
|
||||
let text = number.to_string();
|
||||
Some(indexer.index_text(document_id, indexed_pos, &text))
|
||||
},
|
||||
Value::String(string) => {
|
||||
Some(indexer.index_text(document_id, indexed_pos, &string))
|
||||
},
|
||||
Value::Array(_) => {
|
||||
let text = value_to_string(value);
|
||||
Some(indexer.index_text(document_id, indexed_pos, &text))
|
||||
},
|
||||
Value::Object(_) => {
|
||||
let text = value_to_string(value);
|
||||
Some(indexer.index_text(document_id, indexed_pos, &text))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Transforms the JSON Value type into a String.
|
||||
pub fn value_to_string(value: &Value) -> String {
|
||||
fn internal_value_to_string(string: &mut String, value: &Value) {
|
||||
match value {
|
||||
Value::Null => (),
|
||||
Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); },
|
||||
Value::Number(number) => { let _ = write!(string, "{}", &number); },
|
||||
Value::String(text) => string.push_str(&text),
|
||||
Value::Array(array) => {
|
||||
for value in array {
|
||||
internal_value_to_string(string, value);
|
||||
let _ = string.write_str(". ");
|
||||
}
|
||||
},
|
||||
Value::Object(object) => {
|
||||
for (key, value) in object {
|
||||
string.push_str(key);
|
||||
let _ = string.write_str(". ");
|
||||
internal_value_to_string(string, value);
|
||||
let _ = string.write_str(". ");
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
let mut string = String::new();
|
||||
internal_value_to_string(&mut string, value);
|
||||
string
|
||||
}
|
||||
|
||||
/// Transforms the JSON Value type into a Number.
|
||||
pub fn value_to_number(value: &Value) -> Option<Number> {
|
||||
use std::str::FromStr;
|
||||
|
||||
match value {
|
||||
Value::Null => None,
|
||||
Value::Bool(boolean) => Some(Number::Unsigned(*boolean as u64)),
|
||||
Value::Number(number) => {
|
||||
match (number.as_i64(), number.as_u64(), number.as_f64()) {
|
||||
(Some(n), _, _) => Some(Number::Signed(n)),
|
||||
(_, Some(n), _) => Some(Number::Unsigned(n)),
|
||||
(_, _, Some(n)) => Some(Number::Float(OrderedFloat(n))),
|
||||
(None, None, None) => None,
|
||||
}
|
||||
},
|
||||
Value::String(string) => Number::from_str(string).ok(),
|
||||
Value::Array(_array) => None,
|
||||
Value::Object(_object) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Validates a string representation to be a correct document id and returns
|
||||
/// the corresponding id or generate a new one, this is the way we produce documents ids.
|
||||
pub fn discover_document_id<F>(
|
||||
docid: &str,
|
||||
external_docids_get: F,
|
||||
available_docids: &mut DiscoverIds<'_>,
|
||||
) -> Result<DocumentId, SerializerError>
|
||||
where
|
||||
F: FnOnce(&str) -> Option<u32>
|
||||
{
|
||||
if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
||||
match external_docids_get(docid) {
|
||||
Some(id) => Ok(DocumentId(id)),
|
||||
None => {
|
||||
let internal_id = available_docids.next().expect("no more ids available");
|
||||
Ok(internal_id)
|
||||
},
|
||||
}
|
||||
} else {
|
||||
Err(SerializerError::InvalidDocumentIdFormat)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts and validates the document id of a document.
|
||||
pub fn extract_document_id<F>(
|
||||
primary_key: &str,
|
||||
document: &IndexMap<String, Value>,
|
||||
external_docids_get: F,
|
||||
available_docids: &mut DiscoverIds<'_>,
|
||||
) -> Result<(DocumentId, String), SerializerError>
|
||||
where
|
||||
F: FnOnce(&str) -> Option<u32>
|
||||
{
|
||||
match document.get(primary_key) {
|
||||
Some(value) => {
|
||||
let docid = match value {
|
||||
Value::Number(number) => number.to_string(),
|
||||
Value::String(string) => string.clone(),
|
||||
_ => return Err(SerializerError::InvalidDocumentIdFormat),
|
||||
};
|
||||
discover_document_id(&docid, external_docids_get, available_docids).map(|id| (id, docid))
|
||||
}
|
||||
None => Err(SerializerError::DocumentIdNotFound),
|
||||
}
|
||||
}
|
||||
@@ -1,384 +0,0 @@
|
||||
mod clear_all;
|
||||
mod customs_update;
|
||||
mod documents_addition;
|
||||
mod documents_deletion;
|
||||
mod settings_update;
|
||||
mod helpers;
|
||||
|
||||
pub use self::clear_all::{apply_clear_all, push_clear_all};
|
||||
pub use self::customs_update::{apply_customs_update, push_customs_update};
|
||||
pub use self::documents_addition::{apply_documents_addition, apply_documents_partial_addition, DocumentsAddition};
|
||||
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
||||
pub use self::helpers::{index_value, value_to_string, value_to_number, discover_document_id, extract_document_id};
|
||||
pub use self::settings_update::{apply_settings_update, push_settings_update};
|
||||
|
||||
use std::cmp;
|
||||
use std::time::Instant;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use heed::Result as ZResult;
|
||||
use indexmap::IndexMap;
|
||||
use log::debug;
|
||||
use sdset::Set;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
use meilisearch_error::ErrorCode;
|
||||
use meilisearch_types::DocumentId;
|
||||
|
||||
use crate::{store, MResult, RankedMap};
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::settings::SettingsUpdate;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Update {
|
||||
data: UpdateData,
|
||||
enqueued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Update {
|
||||
fn clear_all() -> Update {
|
||||
Update {
|
||||
data: UpdateData::ClearAll,
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn customs(data: Vec<u8>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::Customs(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn documents_addition(documents: Vec<IndexMap<String, Value>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::DocumentsAddition(documents),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn documents_partial(documents: Vec<IndexMap<String, Value>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::DocumentsPartial(documents),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn documents_deletion(data: Vec<String>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::DocumentsDeletion(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn settings(data: SettingsUpdate) -> Update {
|
||||
Update {
|
||||
data: UpdateData::Settings(Box::new(data)),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateData {
|
||||
ClearAll,
|
||||
Customs(Vec<u8>),
|
||||
DocumentsAddition(Vec<IndexMap<String, Value>>),
|
||||
DocumentsPartial(Vec<IndexMap<String, Value>>),
|
||||
DocumentsDeletion(Vec<String>),
|
||||
Settings(Box<SettingsUpdate>)
|
||||
}
|
||||
|
||||
impl UpdateData {
|
||||
pub fn update_type(&self) -> UpdateType {
|
||||
match self {
|
||||
UpdateData::ClearAll => UpdateType::ClearAll,
|
||||
UpdateData::Customs(_) => UpdateType::Customs,
|
||||
UpdateData::DocumentsAddition(addition) => UpdateType::DocumentsAddition {
|
||||
number: addition.len(),
|
||||
},
|
||||
UpdateData::DocumentsPartial(addition) => UpdateType::DocumentsPartial {
|
||||
number: addition.len(),
|
||||
},
|
||||
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
|
||||
number: deletion.len(),
|
||||
},
|
||||
UpdateData::Settings(update) => UpdateType::Settings {
|
||||
settings: update.clone(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "name")]
|
||||
pub enum UpdateType {
|
||||
ClearAll,
|
||||
Customs,
|
||||
DocumentsAddition { number: usize },
|
||||
DocumentsPartial { number: usize },
|
||||
DocumentsDeletion { number: usize },
|
||||
Settings { settings: Box<SettingsUpdate> },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ProcessedUpdateResult {
|
||||
pub update_id: u64,
|
||||
#[serde(rename = "type")]
|
||||
pub update_type: UpdateType,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error_type: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error_code: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error_link: Option<String>,
|
||||
pub duration: f64, // in seconds
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
pub processed_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct EnqueuedUpdateResult {
|
||||
pub update_id: u64,
|
||||
#[serde(rename = "type")]
|
||||
pub update_type: UpdateType,
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", tag = "status")]
|
||||
pub enum UpdateStatus {
|
||||
Enqueued {
|
||||
#[serde(flatten)]
|
||||
content: EnqueuedUpdateResult,
|
||||
},
|
||||
Failed {
|
||||
#[serde(flatten)]
|
||||
content: ProcessedUpdateResult,
|
||||
},
|
||||
Processed {
|
||||
#[serde(flatten)]
|
||||
content: ProcessedUpdateResult,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn update_status(
|
||||
update_reader: &heed::RoTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
update_id: u64,
|
||||
) -> MResult<Option<UpdateStatus>> {
|
||||
match updates_results_store.update_result(update_reader, update_id)? {
|
||||
Some(result) => {
|
||||
if result.error.is_some() {
|
||||
Ok(Some(UpdateStatus::Failed { content: result }))
|
||||
} else {
|
||||
Ok(Some(UpdateStatus::Processed { content: result }))
|
||||
}
|
||||
},
|
||||
None => match updates_store.get(update_reader, update_id)? {
|
||||
Some(update) => Ok(Some(UpdateStatus::Enqueued {
|
||||
content: EnqueuedUpdateResult {
|
||||
update_id,
|
||||
update_type: update.data.update_type(),
|
||||
enqueued_at: update.enqueued_at,
|
||||
},
|
||||
})),
|
||||
None => Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_update_id(
|
||||
update_writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
) -> ZResult<u64> {
|
||||
let last_update = updates_store.last_update(update_writer)?;
|
||||
let last_update = last_update.map(|(n, _)| n);
|
||||
|
||||
let last_update_results_id = updates_results_store.last_update(update_writer)?;
|
||||
let last_update_results_id = last_update_results_id.map(|(n, _)| n);
|
||||
|
||||
let max_update_id = cmp::max(last_update, last_update_results_id);
|
||||
let new_update_id = max_update_id.map_or(0, |n| n + 1);
|
||||
|
||||
Ok(new_update_id)
|
||||
}
|
||||
|
||||
pub fn update_task<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||
index: &store::Index,
|
||||
update_id: u64,
|
||||
update: Update,
|
||||
) -> MResult<ProcessedUpdateResult> {
|
||||
debug!("Processing update number {}", update_id);
|
||||
|
||||
let Update { enqueued_at, data } = update;
|
||||
|
||||
let (update_type, result, duration) = match data {
|
||||
UpdateData::ClearAll => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::ClearAll;
|
||||
let result = apply_clear_all(writer, index);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::Customs(customs) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::Customs;
|
||||
let result = apply_customs_update(writer, index.main, &customs).map_err(Into::into);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::DocumentsAddition(documents) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::DocumentsAddition {
|
||||
number: documents.len(),
|
||||
};
|
||||
|
||||
let result = apply_documents_addition(writer, index, documents);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::DocumentsPartial(documents) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::DocumentsPartial {
|
||||
number: documents.len(),
|
||||
};
|
||||
|
||||
let result = apply_documents_partial_addition(writer, index, documents);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::DocumentsDeletion(documents) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::DocumentsDeletion {
|
||||
number: documents.len(),
|
||||
};
|
||||
|
||||
let result = apply_documents_deletion(writer, index, documents);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::Settings(settings) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::Settings {
|
||||
settings: settings.clone(),
|
||||
};
|
||||
|
||||
let result = apply_settings_update(
|
||||
writer,
|
||||
index,
|
||||
*settings,
|
||||
);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
};
|
||||
|
||||
debug!(
|
||||
"Processed update number {} {:?} {:?}",
|
||||
update_id, update_type, result
|
||||
);
|
||||
|
||||
let status = ProcessedUpdateResult {
|
||||
update_id,
|
||||
update_type,
|
||||
error: result.as_ref().map_err(|e| e.to_string()).err(),
|
||||
error_code: result.as_ref().map_err(|e| e.error_name()).err(),
|
||||
error_type: result.as_ref().map_err(|e| e.error_type()).err(),
|
||||
error_link: result.as_ref().map_err(|e| e.error_url()).err(),
|
||||
duration: duration.as_secs_f64(),
|
||||
enqueued_at,
|
||||
processed_at: Utc::now(),
|
||||
};
|
||||
|
||||
Ok(status)
|
||||
}
|
||||
|
||||
fn compute_short_prefixes<A>(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
words_fst: &fst::Set<A>,
|
||||
index: &store::Index,
|
||||
) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
// clear the prefixes
|
||||
let pplc_store = index.prefix_postings_lists_cache;
|
||||
pplc_store.clear(writer)?;
|
||||
|
||||
for prefix_len in 1..=2 {
|
||||
// compute prefixes and store those in the PrefixPostingsListsCache store.
|
||||
let mut previous_prefix: Option<([u8; 4], Vec<_>)> = None;
|
||||
let mut stream = words_fst.into_stream();
|
||||
while let Some(input) = stream.next() {
|
||||
|
||||
// We skip the prefixes that are shorter than the current length
|
||||
// we want to cache (<). We must ignore the input when it is exactly the
|
||||
// same word as the prefix because if we match exactly on it we need
|
||||
// to consider it as an exact match and not as a prefix (=).
|
||||
if input.len() <= prefix_len { continue }
|
||||
|
||||
if let Some(postings_list) = index.postings_lists.postings_list(writer, input)?.map(|p| p.matches.into_owned()) {
|
||||
let prefix = &input[..prefix_len];
|
||||
|
||||
let mut arr_prefix = [0; 4];
|
||||
arr_prefix[..prefix_len].copy_from_slice(prefix);
|
||||
|
||||
match previous_prefix {
|
||||
Some((ref mut prev_prefix, ref mut prev_pl)) if *prev_prefix != arr_prefix => {
|
||||
prev_pl.sort_unstable();
|
||||
prev_pl.dedup();
|
||||
|
||||
if let Ok(prefix) = std::str::from_utf8(&prev_prefix[..prefix_len]) {
|
||||
debug!("writing the prefix of {:?} of length {}", prefix, prev_pl.len());
|
||||
}
|
||||
|
||||
let pls = Set::new_unchecked(&prev_pl);
|
||||
pplc_store.put_prefix_postings_list(writer, *prev_prefix, &pls)?;
|
||||
|
||||
*prev_prefix = arr_prefix;
|
||||
prev_pl.clear();
|
||||
prev_pl.extend_from_slice(&postings_list);
|
||||
},
|
||||
Some((_, ref mut prev_pl)) => prev_pl.extend_from_slice(&postings_list),
|
||||
None => previous_prefix = Some((arr_prefix, postings_list.to_vec())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write the last prefix postings lists
|
||||
if let Some((prev_prefix, mut prev_pl)) = previous_prefix.take() {
|
||||
prev_pl.sort_unstable();
|
||||
prev_pl.dedup();
|
||||
|
||||
let pls = Set::new_unchecked(&prev_pl);
|
||||
pplc_store.put_prefix_postings_list(writer, prev_prefix, &pls)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cache_document_ids_sorted(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
ranked_map: &RankedMap,
|
||||
index: &store::Index,
|
||||
document_ids: &mut [DocumentId],
|
||||
) -> MResult<()> {
|
||||
crate::bucket_sort::placeholder_document_sort(document_ids, index, writer, ranked_map)?;
|
||||
index.main.put_sorted_document_ids_cache(writer, &document_ids)
|
||||
}
|
||||
@@ -1,313 +0,0 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use sdset::SetBuf;
|
||||
use meilisearch_schema::Schema;
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::settings::{UpdateState, SettingsUpdate, RankingRule};
|
||||
use crate::update::documents_addition::reindex_all_documents;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult, Error};
|
||||
|
||||
pub fn push_settings_update(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
settings: SettingsUpdate,
|
||||
) -> ZResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::settings(settings);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_settings_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
settings: SettingsUpdate,
|
||||
) -> MResult<()> {
|
||||
let mut must_reindex = false;
|
||||
|
||||
let mut schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => {
|
||||
match settings.primary_key.clone() {
|
||||
UpdateState::Update(id) => Schema::with_primary_key(&id),
|
||||
_ => return Err(Error::MissingPrimaryKey)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match settings.ranking_rules {
|
||||
UpdateState::Update(v) => {
|
||||
let ranked_field: Vec<&str> = v.iter().filter_map(RankingRule::field).collect();
|
||||
schema.update_ranked(&ranked_field)?;
|
||||
index.main.put_ranking_rules(writer, &v)?;
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
index.main.delete_ranking_rules(writer)?;
|
||||
schema.clear_ranked();
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
match settings.distinct_attribute {
|
||||
UpdateState::Update(v) => {
|
||||
let field_id = schema.insert(&v)?;
|
||||
index.main.put_distinct_attribute(writer, field_id)?;
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
index.main.delete_distinct_attribute(writer)?;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
match settings.searchable_attributes.clone() {
|
||||
UpdateState::Update(v) => {
|
||||
if v.iter().any(|e| e == "*") || v.is_empty() {
|
||||
schema.set_all_fields_as_indexed();
|
||||
} else {
|
||||
schema.update_indexed(v)?;
|
||||
}
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
schema.set_all_fields_as_indexed();
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
match settings.displayed_attributes.clone() {
|
||||
UpdateState::Update(v) => {
|
||||
if v.contains("*") || v.is_empty() {
|
||||
schema.set_all_fields_as_displayed();
|
||||
} else {
|
||||
schema.update_displayed(v)?
|
||||
}
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
schema.set_all_fields_as_displayed();
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
match settings.attributes_for_faceting {
|
||||
UpdateState::Update(attrs) => {
|
||||
apply_attributes_for_faceting_update(writer, index, &mut schema, &attrs)?;
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
index.main.delete_attributes_for_faceting(writer)?;
|
||||
index.facets.clear(writer)?;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
match settings.stop_words {
|
||||
UpdateState::Update(stop_words) => {
|
||||
if apply_stop_words_update(writer, index, stop_words)? {
|
||||
must_reindex = true;
|
||||
}
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
if apply_stop_words_update(writer, index, BTreeSet::new())? {
|
||||
must_reindex = true;
|
||||
}
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
match settings.synonyms {
|
||||
UpdateState::Update(synonyms) => apply_synonyms_update(writer, index, synonyms)?,
|
||||
UpdateState::Clear => apply_synonyms_update(writer, index, BTreeMap::new())?,
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
if must_reindex {
|
||||
reindex_all_documents(writer, index)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn apply_attributes_for_faceting_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
schema: &mut Schema,
|
||||
attributes: &[String]
|
||||
) -> MResult<()> {
|
||||
let mut attribute_ids = Vec::new();
|
||||
for name in attributes {
|
||||
attribute_ids.push(schema.insert(name)?);
|
||||
}
|
||||
let attributes_for_faceting = SetBuf::from_dirty(attribute_ids);
|
||||
index.main.put_attributes_for_faceting(writer, &attributes_for_faceting)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn apply_stop_words_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
stop_words: BTreeSet<String>,
|
||||
) -> MResult<bool>
|
||||
{
|
||||
let mut must_reindex = false;
|
||||
|
||||
let old_stop_words: BTreeSet<String> = index.main
|
||||
.stop_words_fst(writer)?
|
||||
.stream()
|
||||
.into_strs()?
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let deletion: BTreeSet<String> = old_stop_words.difference(&stop_words).cloned().collect();
|
||||
let addition: BTreeSet<String> = stop_words.difference(&old_stop_words).cloned().collect();
|
||||
|
||||
if !addition.is_empty() {
|
||||
apply_stop_words_addition(writer, index, addition)?;
|
||||
}
|
||||
|
||||
if !deletion.is_empty() {
|
||||
must_reindex = true;
|
||||
apply_stop_words_deletion(writer, index, deletion)?;
|
||||
}
|
||||
|
||||
let words_fst = index.main.words_fst(writer)?;
|
||||
if !words_fst.is_empty() {
|
||||
let stop_words = fst::Set::from_iter(stop_words)?;
|
||||
let op = OpBuilder::new()
|
||||
.add(&words_fst)
|
||||
.add(&stop_words)
|
||||
.difference();
|
||||
|
||||
let mut builder = fst::SetBuilder::memory();
|
||||
builder.extend_stream(op)?;
|
||||
let words_fst = builder.into_set();
|
||||
|
||||
index.main.put_words_fst(writer, &words_fst)?;
|
||||
index.main.put_stop_words_fst(writer, &stop_words)?;
|
||||
}
|
||||
|
||||
Ok(must_reindex)
|
||||
}
|
||||
|
||||
fn apply_stop_words_addition(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
addition: BTreeSet<String>,
|
||||
) -> MResult<()>
|
||||
{
|
||||
let main_store = index.main;
|
||||
let postings_lists_store = index.postings_lists;
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in addition {
|
||||
stop_words_builder.insert(&word)?;
|
||||
// we remove every posting list associated to a new stop word
|
||||
postings_lists_store.del_postings_list(writer, word.as_bytes())?;
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder.into_set();
|
||||
|
||||
// we also need to remove all the stop words from the main fst
|
||||
let words_fst = main_store.words_fst(writer)?;
|
||||
if !words_fst.is_empty() {
|
||||
let op = OpBuilder::new()
|
||||
.add(&words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut word_fst_builder = SetBuilder::memory();
|
||||
word_fst_builder.extend_stream(op)?;
|
||||
let word_fst = word_fst_builder.into_set();
|
||||
|
||||
main_store.put_words_fst(writer, &word_fst)?;
|
||||
}
|
||||
|
||||
// now we add all of these stop words from the main store
|
||||
let stop_words_fst = main_store.stop_words_fst(writer)?;
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.r#union();
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op)?;
|
||||
let stop_words_fst = stop_words_builder.into_set();
|
||||
|
||||
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn apply_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in deletion {
|
||||
stop_words_builder.insert(&word)?;
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder.into_set();
|
||||
|
||||
// now we delete all of these stop words from the main store
|
||||
let stop_words_fst = index.main.stop_words_fst(writer)?;
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op)?;
|
||||
let stop_words_fst = stop_words_builder.into_set();
|
||||
|
||||
Ok(index.main.put_stop_words_fst(writer, &stop_words_fst)?)
|
||||
}
|
||||
|
||||
pub fn apply_synonyms_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
synonyms: BTreeMap<String, Vec<String>>,
|
||||
) -> MResult<()> {
|
||||
|
||||
let main_store = index.main;
|
||||
let synonyms_store = index.synonyms;
|
||||
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
synonyms_store.clear(writer)?;
|
||||
for (word, alternatives) in synonyms.clone() {
|
||||
synonyms_builder.insert(&word)?;
|
||||
|
||||
let alternatives = {
|
||||
let alternatives = SetBuf::from_dirty(alternatives);
|
||||
let mut alternatives_builder = SetBuilder::memory();
|
||||
alternatives_builder.extend_iter(alternatives)?;
|
||||
alternatives_builder.into_set()
|
||||
};
|
||||
|
||||
synonyms_store.put_synonyms(writer, word.as_bytes(), &alternatives)?;
|
||||
}
|
||||
|
||||
let synonyms_set = synonyms_builder.into_set();
|
||||
|
||||
main_store.put_synonyms_fst(writer, &synonyms_set)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,8 +1,16 @@
|
||||
[package]
|
||||
name = "meilisearch-error"
|
||||
version = "0.14.1"
|
||||
version = "0.25.2"
|
||||
authors = ["marin <postma.marin@protonmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
actix-http = "1.0.1"
|
||||
actix-http = "=3.0.0-beta.10"
|
||||
actix-web = "4.0.0-beta.9"
|
||||
proptest = { version = "1.0.0", optional = true }
|
||||
proptest-derive = { version = "0.3.0", optional = true }
|
||||
serde = { version = "1.0.130", features = ["derive"] }
|
||||
serde_json = "1.0.69"
|
||||
|
||||
[features]
|
||||
test-traits = ["proptest", "proptest-derive"]
|
||||
|
||||
@@ -1,6 +1,75 @@
|
||||
use std::fmt;
|
||||
|
||||
use actix_http::http::StatusCode;
|
||||
use actix_http::{body::Body, http::StatusCode};
|
||||
use actix_web::{self as aweb, HttpResponseBuilder};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[cfg_attr(feature = "test-traits", derive(proptest_derive::Arbitrary))]
|
||||
pub struct ResponseError {
|
||||
#[serde(skip)]
|
||||
#[cfg_attr(
|
||||
feature = "test-traits",
|
||||
proptest(strategy = "strategy::status_code_strategy()")
|
||||
)]
|
||||
code: StatusCode,
|
||||
message: String,
|
||||
#[serde(rename = "code")]
|
||||
error_code: String,
|
||||
#[serde(rename = "type")]
|
||||
error_type: String,
|
||||
#[serde(rename = "link")]
|
||||
error_link: String,
|
||||
}
|
||||
|
||||
impl ResponseError {
|
||||
pub fn from_msg(message: String, code: Code) -> Self {
|
||||
Self {
|
||||
code: code.http(),
|
||||
message,
|
||||
error_code: code.err_code().error_name.to_string(),
|
||||
error_type: code.type_(),
|
||||
error_link: code.url(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ResponseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.message.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ResponseError {}
|
||||
|
||||
impl<T> From<T> for ResponseError
|
||||
where
|
||||
T: ErrorCode,
|
||||
{
|
||||
fn from(other: T) -> Self {
|
||||
Self {
|
||||
code: other.http_status(),
|
||||
message: other.to_string(),
|
||||
error_code: other.error_name(),
|
||||
error_type: other.error_type(),
|
||||
error_link: other.error_url(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl aweb::error::ResponseError for ResponseError {
|
||||
fn error_response(&self) -> aweb::HttpResponse<Body> {
|
||||
let json = serde_json::to_vec(self).unwrap();
|
||||
HttpResponseBuilder::new(self.status_code())
|
||||
.content_type("application/json")
|
||||
.body(json)
|
||||
}
|
||||
|
||||
fn status_code(&self) -> StatusCode {
|
||||
self.code
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ErrorCode: std::error::Error {
|
||||
fn error_code(&self) -> Code;
|
||||
@@ -38,20 +107,20 @@ impl fmt::Display for ErrorType {
|
||||
use ErrorType::*;
|
||||
|
||||
match self {
|
||||
InternalError => write!(f, "internal_error"),
|
||||
InvalidRequestError => write!(f, "invalid_request_error"),
|
||||
AuthenticationError => write!(f, "authentication_error"),
|
||||
InternalError => write!(f, "internal"),
|
||||
InvalidRequestError => write!(f, "invalid_request"),
|
||||
AuthenticationError => write!(f, "auth"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
|
||||
pub enum Code {
|
||||
// index related error
|
||||
CreateIndex,
|
||||
IndexAlreadyExists,
|
||||
IndexNotFound,
|
||||
InvalidIndexUid,
|
||||
OpenIndex,
|
||||
|
||||
// invalid state error
|
||||
InvalidState,
|
||||
@@ -60,26 +129,46 @@ pub enum Code {
|
||||
|
||||
MaxFieldsLimitExceeded,
|
||||
MissingDocumentId,
|
||||
InvalidDocumentId,
|
||||
|
||||
Facet,
|
||||
Filter,
|
||||
Sort,
|
||||
|
||||
BadParameter,
|
||||
BadRequest,
|
||||
DatabaseSizeLimitReached,
|
||||
DocumentNotFound,
|
||||
Internal,
|
||||
InvalidGeoField,
|
||||
InvalidRankingRule,
|
||||
InvalidStore,
|
||||
InvalidToken,
|
||||
Maintenance,
|
||||
MissingAuthorizationHeader,
|
||||
NotFound,
|
||||
NoSpaceLeftOnDevice,
|
||||
DumpNotFound,
|
||||
TaskNotFound,
|
||||
PayloadTooLarge,
|
||||
RetrieveDocument,
|
||||
SearchDocuments,
|
||||
UnsupportedMediaType,
|
||||
|
||||
DumpAlreadyInProgress,
|
||||
DumpProcessFailed,
|
||||
|
||||
InvalidContentType,
|
||||
MissingContentType,
|
||||
MalformedPayload,
|
||||
MissingPayload,
|
||||
|
||||
ApiKeyNotFound,
|
||||
MissingParameter,
|
||||
InvalidApiKeyActions,
|
||||
InvalidApiKeyIndexes,
|
||||
InvalidApiKeyExpiresAt,
|
||||
InvalidApiKeyDescription,
|
||||
}
|
||||
|
||||
impl Code {
|
||||
|
||||
/// ascociate a `Code` variant to the actual ErrCode
|
||||
fn err_code(&self) -> ErrCode {
|
||||
use Code::*;
|
||||
@@ -87,41 +176,102 @@ impl Code {
|
||||
match self {
|
||||
// index related errors
|
||||
// create index is thrown on internal error while creating an index.
|
||||
CreateIndex => ErrCode::internal("index_creation_failed", StatusCode::BAD_REQUEST),
|
||||
IndexAlreadyExists => ErrCode::invalid("index_already_exists", StatusCode::BAD_REQUEST),
|
||||
CreateIndex => {
|
||||
ErrCode::internal("index_creation_failed", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
IndexAlreadyExists => ErrCode::invalid("index_already_exists", StatusCode::CONFLICT),
|
||||
// thrown when requesting an unexisting index
|
||||
IndexNotFound => ErrCode::invalid("index_not_found", StatusCode::NOT_FOUND),
|
||||
InvalidIndexUid => ErrCode::invalid("invalid_index_uid", StatusCode::BAD_REQUEST),
|
||||
OpenIndex => ErrCode::internal("index_not_accessible", StatusCode::INTERNAL_SERVER_ERROR),
|
||||
|
||||
// invalid state error
|
||||
InvalidState => ErrCode::internal("invalid_state", StatusCode::INTERNAL_SERVER_ERROR),
|
||||
// thrown when no primary key has been set
|
||||
MissingPrimaryKey => ErrCode::invalid("missing_primary_key", StatusCode::BAD_REQUEST),
|
||||
MissingPrimaryKey => {
|
||||
ErrCode::invalid("primary_key_inference_failed", StatusCode::BAD_REQUEST)
|
||||
}
|
||||
// error thrown when trying to set an already existing primary key
|
||||
PrimaryKeyAlreadyPresent => ErrCode::invalid("primary_key_already_present", StatusCode::BAD_REQUEST),
|
||||
PrimaryKeyAlreadyPresent => {
|
||||
ErrCode::invalid("index_primary_key_already_exists", StatusCode::BAD_REQUEST)
|
||||
}
|
||||
// invalid ranking rule
|
||||
InvalidRankingRule => ErrCode::invalid("invalid_ranking_rule", StatusCode::BAD_REQUEST),
|
||||
|
||||
// invalid database
|
||||
InvalidStore => {
|
||||
ErrCode::internal("invalid_store_file", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
|
||||
// invalid document
|
||||
MaxFieldsLimitExceeded => ErrCode::invalid("max_fields_limit_exceeded", StatusCode::BAD_REQUEST),
|
||||
MaxFieldsLimitExceeded => {
|
||||
ErrCode::invalid("max_fields_limit_exceeded", StatusCode::BAD_REQUEST)
|
||||
}
|
||||
MissingDocumentId => ErrCode::invalid("missing_document_id", StatusCode::BAD_REQUEST),
|
||||
InvalidDocumentId => ErrCode::invalid("invalid_document_id", StatusCode::BAD_REQUEST),
|
||||
|
||||
// error related to facets
|
||||
Facet => ErrCode::invalid("invalid_facet", StatusCode::BAD_REQUEST),
|
||||
// error related to filters
|
||||
Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST),
|
||||
// error related to sorts
|
||||
Sort => ErrCode::invalid("invalid_sort", StatusCode::BAD_REQUEST),
|
||||
|
||||
BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST),
|
||||
BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST),
|
||||
DatabaseSizeLimitReached => ErrCode::internal(
|
||||
"database_size_limit_reached",
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
),
|
||||
DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND),
|
||||
Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR),
|
||||
InvalidToken => ErrCode::authentication("invalid_token", StatusCode::FORBIDDEN),
|
||||
Maintenance => ErrCode::internal("maintenance", StatusCode::SERVICE_UNAVAILABLE),
|
||||
MissingAuthorizationHeader => ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED),
|
||||
NotFound => ErrCode::invalid("not_found", StatusCode::NOT_FOUND),
|
||||
InvalidGeoField => ErrCode::invalid("invalid_geo_field", StatusCode::BAD_REQUEST),
|
||||
InvalidToken => ErrCode::authentication("invalid_api_key", StatusCode::FORBIDDEN),
|
||||
MissingAuthorizationHeader => {
|
||||
ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED)
|
||||
}
|
||||
TaskNotFound => ErrCode::invalid("task_not_found", StatusCode::NOT_FOUND),
|
||||
DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND),
|
||||
NoSpaceLeftOnDevice => {
|
||||
ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE),
|
||||
RetrieveDocument => ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST),
|
||||
RetrieveDocument => {
|
||||
ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST)
|
||||
}
|
||||
SearchDocuments => ErrCode::internal("search_error", StatusCode::BAD_REQUEST),
|
||||
UnsupportedMediaType => ErrCode::invalid("unsupported_media_type", StatusCode::UNSUPPORTED_MEDIA_TYPE),
|
||||
UnsupportedMediaType => {
|
||||
ErrCode::invalid("unsupported_media_type", StatusCode::UNSUPPORTED_MEDIA_TYPE)
|
||||
}
|
||||
|
||||
// error related to dump
|
||||
DumpAlreadyInProgress => {
|
||||
ErrCode::invalid("dump_already_processing", StatusCode::CONFLICT)
|
||||
}
|
||||
DumpProcessFailed => {
|
||||
ErrCode::internal("dump_process_failed", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
MissingContentType => {
|
||||
ErrCode::invalid("missing_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE)
|
||||
}
|
||||
MalformedPayload => ErrCode::invalid("malformed_payload", StatusCode::BAD_REQUEST),
|
||||
InvalidContentType => {
|
||||
ErrCode::invalid("invalid_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE)
|
||||
}
|
||||
MissingPayload => ErrCode::invalid("missing_payload", StatusCode::BAD_REQUEST),
|
||||
|
||||
// error related to keys
|
||||
ApiKeyNotFound => ErrCode::invalid("api_key_not_found", StatusCode::NOT_FOUND),
|
||||
MissingParameter => ErrCode::invalid("missing_parameter", StatusCode::BAD_REQUEST),
|
||||
InvalidApiKeyActions => {
|
||||
ErrCode::invalid("invalid_api_key_actions", StatusCode::BAD_REQUEST)
|
||||
}
|
||||
InvalidApiKeyIndexes => {
|
||||
ErrCode::invalid("invalid_api_key_indexes", StatusCode::BAD_REQUEST)
|
||||
}
|
||||
InvalidApiKeyExpiresAt => {
|
||||
ErrCode::invalid("invalid_api_key_expires_at", StatusCode::BAD_REQUEST)
|
||||
}
|
||||
InvalidApiKeyDescription => {
|
||||
ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,3 +328,27 @@ impl ErrCode {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "test-traits")]
|
||||
mod strategy {
|
||||
use proptest::strategy::Strategy;
|
||||
|
||||
use super::*;
|
||||
|
||||
pub(super) fn status_code_strategy() -> impl Strategy<Value = StatusCode> {
|
||||
(100..999u16).prop_map(|i| StatusCode::from_u16(i).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
$(
|
||||
impl From<$other> for $target {
|
||||
fn from(other: $other) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user